From 5a099b84cc421ff1b7d86ae1461ef62c0cb9487c Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 5 May 2026 14:57:26 +0000 Subject: [PATCH 01/11] test-tool: add a "historian" subcommand for building merge fixtures The merge-replay tests added in a follow-up commit need a way to set up specific topologies with full control over blob contents, parent order, and per-side trees. Sequencing plumbing commands or driving plain `git fast-import` from shell quickly becomes unreadable for the kinds of scenarios that exercise non-trivial merge resolution (textual conflicts, semantic edits outside the conflict region, intentional limitations such as new content on one side). Add a small `test-tool historian` subcommand that reads a tight, shell-quoted, one-line-per-object DSL and feeds an equivalent stream to a `git fast-import` child process. Each blob and commit is given a logical name; the helper allocates fast-import marks on first use and emits a lightweight tag for every commit so tests can refer to the resulting object via `refs/tags/`. The DSL has just two directives: blob NAME LINE... commit NAME BRANCH SUBJECT [from=NAME] [merge=NAME]... [PATH=BLOB]... A blob's content is the listed lines joined with `\n` (and a final `\n`); a commit's tree is exactly the listed PATH=BLOB pairs (the helper emits a `deleteall` so nothing leaks in from the implicit parent). Token splitting is delegated to `split_cmdline()` so quoted arguments work as in shell. Marks for parent references and file contents go through the same `strintmap`-backed name resolver, which keeps the helper itself trivially small: blob writing, tree construction, commit creation and merge-base computation are all handled by `git fast-import`. Note that the DSL reserves the names `from` and `merge` (with a trailing `=`) for parent specification; a tree path called `from` or `merge` cannot be expressed via this helper. That is acceptable here because every input is a tightly controlled test fixture and the filenames are chosen by the test author. The helper trusts its caller: malformed input results in a fast-import error rather than a friendly diagnostic. Wire the new subcommand into the Makefile and meson build, register it in `t/helper/test-tool.{c,h}`. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- Makefile | 1 + t/helper/meson.build | 1 + t/helper/test-historian.c | 196 ++++++++++++++++++++++++++++++++++++++ t/helper/test-tool.c | 1 + t/helper/test-tool.h | 1 + 5 files changed, 200 insertions(+) create mode 100644 t/helper/test-historian.c diff --git a/Makefile b/Makefile index cedc234173e377..b38678b484ac6a 100644 --- a/Makefile +++ b/Makefile @@ -832,6 +832,7 @@ TEST_BUILTINS_OBJS += test-hash-speed.o TEST_BUILTINS_OBJS += test-hash.o TEST_BUILTINS_OBJS += test-hashmap.o TEST_BUILTINS_OBJS += test-hexdump.o +TEST_BUILTINS_OBJS += test-historian.o TEST_BUILTINS_OBJS += test-json-writer.o TEST_BUILTINS_OBJS += test-lazy-init-name-hash.o TEST_BUILTINS_OBJS += test-match-trees.o diff --git a/t/helper/meson.build b/t/helper/meson.build index 675e64c0101b61..704edd1e1faf79 100644 --- a/t/helper/meson.build +++ b/t/helper/meson.build @@ -29,6 +29,7 @@ test_tool_sources = [ 'test-hash.c', 'test-hashmap.c', 'test-hexdump.c', + 'test-historian.c', 'test-json-writer.c', 'test-lazy-init-name-hash.c', 'test-match-trees.c', diff --git a/t/helper/test-historian.c b/t/helper/test-historian.c new file mode 100644 index 00000000000000..1385ec05d68a84 --- /dev/null +++ b/t/helper/test-historian.c @@ -0,0 +1,196 @@ +/* + * Build a small history out of a tiny declarative input. Used by tests + * that need specific merge topologies without long sequences of + * plumbing commands or fragile shell helpers. + * + * The historian reads stdin line by line and emits an equivalent + * stream to a `git fast-import` child process. It also allocates marks + * for named objects so tests can refer to commits and blobs by name. + * + * Input directives (one per line, shell-style quoting): + * + * blob NAME LINE1 LINE2 ... + * Each LINE becomes a content line in the blob; lines are + * joined with '\n' and the blob ends with a final '\n'. With + * no LINEs, the blob is empty. + * + * commit NAME BRANCH SUBJECT [parent=PARENT]... [PATH:BLOB]... + * Creates a commit on refs/heads/BRANCH using the listed + * `PATH:BLOB` mappings as the entire tree (no inheritance + * from parents). Zero or more `parent=` arguments may be + * given in order: the first becomes fast-import's `from`, + * each subsequent one becomes a `merge` parent. Without any + * `parent=` the commit defaults to the current branch tip, + * or becomes a root if BRANCH has no tip yet. + * + * Each `commit NAME` directive also creates a lightweight tag + * `refs/tags/NAME` so tests can `git rev-parse NAME`. + * + * This helper trusts its caller; malformed input results in fast-import + * errors. That is fine because test scripts feed it tightly controlled + * input. + */ + +#define USE_THE_REPOSITORY_VARIABLE + +#include "test-tool.h" +#include "git-compat-util.h" +#include "alias.h" +#include "run-command.h" +#include "setup.h" +#include "strbuf.h" +#include "strmap.h" +#include "strvec.h" + +static int next_mark = 1; + +static int resolve_mark(struct strintmap *names, const char *name) +{ + int n = strintmap_get(names, name); + if (!n) { + n = next_mark++; + strintmap_set(names, name, n); + } + return n; +} + +static void emit_data(FILE *out, const char *data, size_t len) +{ + fprintf(out, "data %"PRIuMAX"\n", (uintmax_t)len); + fwrite(data, 1, len, out); + fputc('\n', out); +} + +static void emit_blob(FILE *out, struct strintmap *names, + int argc, const char **argv) +{ + struct strbuf content = STRBUF_INIT; + int n = resolve_mark(names, argv[1]); + int i; + + for (i = 2; i < argc; i++) { + strbuf_addstr(&content, argv[i]); + strbuf_addch(&content, '\n'); + } + + fprintf(out, "blob\nmark :%d\n", n); + emit_data(out, content.buf, content.len); + strbuf_release(&content); +} + +static void emit_tag(FILE *out, const char *name, int mark) +{ + fprintf(out, "reset refs/tags/%s\nfrom :%d\n\n", name, mark); +} + +static void emit_commit(FILE *out, struct strintmap *names, + int argc, const char **argv, int seq) +{ + int n = resolve_mark(names, argv[1]); + const char *branch = argv[2]; + const char *subject = argv[3]; + const char *rest; + int seen_parent = 0; + int i; + + fprintf(out, "commit refs/heads/%s\nmark :%d\n", branch, n); + fprintf(out, "author A %d +0000\n", 1700000000 + seq); + fprintf(out, "committer A %d +0000\n", 1700000000 + seq); + emit_data(out, subject, strlen(subject)); + + /* + * fast-import requires `from` and `merge` to precede all file + * operations; emit them first regardless of argv ordering. The + * first `parent=` becomes `from`, subsequent ones become + * `merge`, in the order given. + */ + for (i = 4; i < argc; i++) { + if (!skip_prefix(argv[i], "parent=", &rest)) + continue; + fprintf(out, "%s :%d\n", + seen_parent ? "merge" : "from", + resolve_mark(names, rest)); + seen_parent = 1; + } + + /* + * The PATH:BLOB list is the entire tree; wipe whatever the + * implicit parent contributed before re-applying it. + */ + fprintf(out, "deleteall\n"); + for (i = 4; i < argc; i++) { + const char *colon; + size_t key_len; + char *path; + + if (skip_prefix(argv[i], "parent=", &rest)) + continue; + colon = strchr(argv[i], ':'); + if (!colon) + die("bad commit spec '%s' (need PATH:BLOB or parent=NAME)", + argv[i]); + key_len = colon - argv[i]; + path = xmemdupz(argv[i], key_len); + fprintf(out, "M 100644 :%d %s\n", + resolve_mark(names, colon + 1), path); + free(path); + } + + fputc('\n', out); + emit_tag(out, argv[1], n); +} + +int cmd__historian(int argc, const char **argv UNUSED) +{ + struct child_process fi = CHILD_PROCESS_INIT; + struct strintmap names = STRINTMAP_INIT; + struct strbuf line = STRBUF_INIT; + int seq = 0; + int ret = 0; + FILE *fi_in; + + if (argc != 1) + die("usage: test-tool historian = 2 && !strcmp(a[0], "blob")) + emit_blob(fi_in, &names, n, a); + else if (n >= 4 && !strcmp(a[0], "commit")) + emit_commit(fi_in, &names, n, a, seq++); + else + die("unknown directive: %s", a[0]); + + free(a); + } + + if (fclose(fi_in)) + die_errno("close fast-import stdin"); + if (finish_command(&fi)) + ret = 1; + + strbuf_release(&line); + strintmap_clear(&names); + return ret; +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index a7abc618b3887e..28bde98ce1b76f 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -39,6 +39,7 @@ static struct test_cmd cmds[] = { { "hashmap", cmd__hashmap }, { "hash-speed", cmd__hash_speed }, { "hexdump", cmd__hexdump }, + { "historian", cmd__historian }, { "json-writer", cmd__json_writer }, { "lazy-init-name-hash", cmd__lazy_init_name_hash }, { "match-trees", cmd__match_trees }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index 7f150fa1eb9ad2..78cec8594aac1f 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -32,6 +32,7 @@ int cmd__getcwd(int argc, const char **argv); int cmd__hashmap(int argc, const char **argv); int cmd__hash_speed(int argc, const char **argv); int cmd__hexdump(int argc, const char **argv); +int cmd__historian(int argc, const char **argv); int cmd__json_writer(int argc, const char **argv); int cmd__lazy_init_name_hash(int argc, const char **argv); int cmd__match_trees(int argc, const char **argv); From 0ed83a219ff8b11c341cc6b916c892403659bea0 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 13 May 2026 17:32:58 +0000 Subject: [PATCH 02/11] t3454: introduce the merge-replaying scenario tests as baseline Before adding support for replaying merge commits to `git replay` and `git history`, spell out the desired behavior. Each test describes a self-contained topology built with the `historian` helper and asserts a single property of the replayed merge. The feature surface covered: * Clean merges (each side touches unrelated files): the replay must reconstruct the merge and leave the user's resolution intact. * Non-trivial merges with a manual conflict resolution or a semantic edit outside the conflict region: the user's contribution must survive the replay. * Octopus merges in the rewrite path are rejected loudly rather than corrupted or treated as 2-parent merges by mistake. * Merges whose first parent sits outside the rewrite range keep that parent as-is while only the in-range parent is rewritten. * Symbol-level intent is not extrapolated: a rename the user did inside the original merge is preserved on files that existed at the time, but files that only enter the tree via the rebased parents are not retroactively renamed. * A conflict that the rebased parent newly introduces against the unchanged parent (a region the original merge did not conflict on) is reported as a content conflict, not silently embedded as plain marker bytes in the replayed commit. The same property must hold when the affected file is routed through a binary merge driver. * A file whose plain content happens to contain conflict-marker byte sequences is left alone, not mistaken for a real conflict. * Under `merge.conflictStyle=diff3`, a conflict that the original merge already resolved still collapses cleanly through the outer replay merge even when the inner remerges of the original and the rebased parents have different merge bases (which makes the abbreviated-OID line in the diff3 marker differ between them). Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- t/meson.build | 1 + t/t3454-history-merges.sh | 523 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 524 insertions(+) create mode 100755 t/t3454-history-merges.sh diff --git a/t/meson.build b/t/meson.build index 7528e5cda5fef0..25b0119d4360eb 100644 --- a/t/meson.build +++ b/t/meson.build @@ -397,6 +397,7 @@ integration_tests = [ 't3450-history.sh', 't3451-history-reword.sh', 't3452-history-split.sh', + 't3454-history-merges.sh', 't3500-cherry.sh', 't3501-revert-cherry-pick.sh', 't3502-cherry-pick-merge.sh', diff --git a/t/t3454-history-merges.sh b/t/t3454-history-merges.sh new file mode 100755 index 00000000000000..5537d73e104dac --- /dev/null +++ b/t/t3454-history-merges.sh @@ -0,0 +1,523 @@ +#!/bin/sh + +test_description='git replay and git history across merge commits + +Exercises the merge-replay path in `git history reword`, +`git history split`, and `git replay` using the `test-tool +historian` fixture builder so each scenario is described in a +small declarative input rather than a sprawling sequence of +plumbing commands. +' + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME + +. ./test-lib.sh + +# Replace the commit's message via a fake editor and run reword. +reword_to () { + GIT_EDITOR="echo \"$1\" >" \ + git history reword "$2" +} + +build_clean_merge () { + test-tool historian <<-\EOF + # Setup: + # A (a) --- C (a, h) ----+--- M (a, g, h) + # \ / + # +-- B (a, g) ------+ + # + # Topic touches `g` only; main touches `h` only. The auto-merge + # at M is clean. + blob a "shared content" + blob g guarded + blob h host + commit A main "A" a:a + commit B topic "B (introduces g)" parent=A a:a g:g + commit C main "C (introduces h)" parent=A a:a h:h + commit M main "Merge topic" parent=C parent=B a:a g:g h:h + EOF +} + +test_expect_failure 'clean merge: both sides touch unrelated files' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_clean_merge && + + reword_to "AA" A && + + # The merge is still a 2-parent merge with the same subject + # and tree (clean replay leaves content unchanged). + test_cmp_rev HEAD^{tree} M^{tree} && + test_cmp_rev HEAD^^{tree} M^^{tree} && + test_cmp_rev HEAD^2^{tree} M^2^{tree} && + + echo "Merge topic" >expect && + git log -1 --format=%s HEAD >actual && + test_cmp expect actual + ) +' + +build_textual_resolution () { + test-tool historian <<-\EOF + # Both sides change the same line of `a`; the user resolved with + # their own combined text, recorded directly as the merge tree. + blob a_v1 line1 line2 line3 + blob a_main line1 line2-main line3 + blob a_topic line1 line2-topic line3 + blob a_resolution line1 line2-merged-by-hand line3 + commit A main "A" a:a_v1 + commit B topic "B (line2 on topic)" parent=A a:a_topic + commit C main "C (line2 on main)" parent=A a:a_main + commit M main "Merge topic" parent=C parent=B a:a_resolution + EOF +} + +test_expect_failure 'non-trivial merge: textual manual resolution is preserved' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_textual_resolution && + + reword_to "AA" A && + + test_write_lines line1 line2-merged-by-hand line3 >expect && + git show HEAD:a >actual && + test_cmp expect actual + ) +' + +build_semantic_edit () { + test-tool historian <<-\EOF + # Topic and main conflict on line2 of `a`. The user's resolution + # at M not only picks combined text on line2 but ALSO touches + # line5 (a "semantic" edit outside any conflict region) -- this + # kind of edit is invisible to a naive pick-one-side strategy and + # must be preserved by replay. + blob a_v1 line1 line2 line3 line4 line5 + blob a_main line1 line2-main line3 line4 line5 + blob a_topic line1 line2-topic line3 line4 line5 + blob a_resolution line1 line2-merged line3 line4 line5-touched + commit A main "A" a:a_v1 + commit B topic "B (line2 on topic)" parent=A a:a_topic + commit C main "C (line2 on main)" parent=A a:a_main + commit M main "Merge topic" parent=C parent=B a:a_resolution + EOF +} + +test_expect_failure 'non-trivial merge: semantic edit outside conflict region is preserved' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_semantic_edit && + + reword_to "AA" A && + + test_write_lines line1 line2-merged line3 line4 line5-touched \ + >expect && + git show HEAD:a >actual && + test_cmp expect actual + ) +' + +build_octopus () { + test-tool historian <<-\EOF + blob a "x" + commit A main "A" a:a + commit B b1 "B" parent=A a:a + commit C b2 "C" parent=A a:a + commit D b3 "D" parent=A a:a + commit O main "octopus" parent=A parent=B parent=C parent=D a:a + EOF +} + +test_expect_failure 'octopus merge in the rewrite path is rejected' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_octopus && + + test_must_fail git -c core.editor=true history reword \ + --dry-run A 2>err && + test_grep "octopus" err + ) +' + +build_with_boundary_other_than_onto () { + test-tool historian <<-\EOF + # Setup a topology where the rewrite range crosses a 2-parent merge + # whose first parent sits outside that range: + # + # _------- O (a=O) + # / \ + # / M (parent1=O, parent2=R, a=M, s=top) + # / / + # X (a=X) - A (a=A) -- R (a=R) -- T (a=T, s=top) + # | + # reword target + # + # The walk for `history reword A` excludes A and its ancestors, + # so O sits outside the rewrite range and is not the boundary + # either. Replaying M correctly requires that first parent to + # remain at O (preserve, not replant). + blob X X + blob O O + blob A A + blob R R + blob T T + blob M M + blob top "marker" + commit X main "X" a:X + commit O side "O" parent=X a:O + commit A main "A" parent=X a:A + commit R main "R" parent=A a:R + commit T off "T" parent=R a:T s:top + commit M main "Merge side into main" parent=O parent=R a:M s:top + EOF +} + +# A descendant merge whose first parent sits outside the rewrite +# range is a topology that any reasonable replay of merges has to +# handle correctly: the first parent must be preserved verbatim, +# while the in-range second parent is rewritten. Without that, the +# replayed merge would silently graft itself onto a different +# ancestry than the author chose, which is far worse than a loud +# failure. +test_expect_failure 'merge whose first parent sits outside the rewrite range keeps that parent' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_with_boundary_other_than_onto && + + reword_to "AA" A && + + # The replayed M (i.e. the tip of main) is still a 2-parent + # merge. Its first parent is the original O (preserved, + # outside the rewrite range), its second parent is the + # rewritten R. + test_cmp_rev ! main M && + test_cmp_rev main^ O && + test_cmp_rev ! main^2 R && + test_cmp_rev main^2^{tree} R^{tree} && + + # The `off` branch is changed, the `side` branch is not + test_cmp_rev side O && + test_cmp_rev ! off T + ) +' + +build_function_rename () { + test-tool historian <<-\EOF + # Truly exercise the R/O/N algorithm: The topic renames harry() to + # hermione() (def plus caller1). main adds caller2 calling harry(); + # the original merge M manually renames caller2 to hermione(). The + # "newer" base on a side branch contains caller2 AND a brand-new + # caller3 calling harry(); replaying onto `newer` therefore introduces + # caller3 into the merged tree. + blob def_harry "void harry(void);" + blob def_hermione "void hermione(void);" + blob harry_call "harry();" + blob hermione_call "hermione();" + commit A main "A" def:def_harry caller1:harry_call + commit B topic "B (rename)" parent=A def:def_hermione caller1:hermione_call + commit C main "C (caller2 calls harry)" parent=A def:def_harry caller1:harry_call caller2:harry_call + commit M main "Merge topic" parent=C parent=B def:def_hermione caller1:hermione_call caller2:hermione_call + commit NEW newer "newer base with caller3" parent=C def:def_harry caller1:harry_call caller2:harry_call caller3:harry_call + EOF +} + +# This case checks two things at once. First, the manual semantic edit in M +# (renaming caller2) must be preserved when we replay onto a different base; +# that is the case `git replay` needs to handle correctly, even though nothing +# in the conflict markers tells us about it. Second, a file that only enters +# the tree via the rewritten parents (caller3, present on the `newer` base) is +# _not_ renamed by the replay. The replay propagates the textual diffs the user +# actually made in M; it does _not_ infer the user's symbol-level intent +# ("rename every caller of harry"). This is a known and intentional +# limitation. Symbol-aware refactoring is out of scope here, just as it is for +# plain rebases. +test_expect_failure 'preserves resolutions; does not extrapolate' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_function_rename && + + # Replay M onto the newer base. + git replay --ref-action=print \ + --onto NEW --ancestry-path C..main >out && + new_tip=$(cut -f 3 -d " " out) && + + # def and caller1 came from B (clean cherry-pick of the rename + # commit) and must reflect the rename. + git show $new_tip:def >actual && + test_grep hermione actual && + + git show $new_tip:caller1 >actual && + test_grep hermione actual && + + # caller2 existed in the original M; its manual rename to + # hermione() is the semantic edit the replay must preserve. + git show $new_tip:caller2 >actual && + test_grep hermione actual && + + # caller3 only exists on the newer base, so it was brought + # in by N (the auto-merge of the rebased parents). The + # replay has no way to know the user intended to rename + # every caller; caller3 keeps harry(). The resulting tree + # is therefore _not_ symbol-correct and needs a follow-up + # edit. This is the documented limitation. + git show $new_tip:caller3 >actual && + test_grep ! hermione actual + ) +' + +build_diverging_conflicts () { + test-tool historian <<-\EOF + # Topology where only one parent of the merge sits on the + # rewrite path: + # + # _-- D --_ + # / \ + # A - B - C - M + # \ + # E + # + # D is on branch `feature`, child of A. + # E is on branch `target`, child of A. + # M = merge(C, D), manual resolution. + # + # D and E conflict in different ways than C and D, therefore + # the resolution in M cannot cover the first conflict and + # replaying B..M onto E must fail. + blob a L1 L2 L3 L4 L5 + blob c L1 L2 _3 L4 L5 + blob d _1 L2 +3 L4 _5 + blob m _1 L2 M3 L4 _5 + blob e L1 L2 L3 L4 +5 + commit A main "A" a:a + commit B main "B (no change)" parent=A a:a + commit C main "C (changes line 3 only)" parent=B a:c + commit D feature "D (changes all odd lines)" parent=A a:d + commit M main "Merge feature into main" parent=C parent=D a:m + commit E target "E (changes line 5)" parent=A a:e + EOF +} + +test_expect_failure 'newly-introduced conflict in a replayed merge' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_diverging_conflicts && + + test_must_fail git replay --ref-action=print --branches \ + --onto E --ancestry-path B..M >out 2>err && + + # Discriminate against the "git replay refuses 2-parent + # merges entirely" failure mode that earlier commits + # in this series exhibit: the test must fail because of + # a real content conflict on `b`, not for any other + # reason. + test_grep "CONFLICT (content)" err && + test_grep "Merge conflict in b" err + ) +' + +build_new_conflict_binary () { + test-tool historian <<-\EOF + # New conflicts are routed through the binary merge driver. + blob attrs "* -text" + blob bin_orig original + blob bin_D dee + blob bin_E ehh + blob misc_orig "misc orig" + blob misc_C "misc changed by C" + commit A main "A" .gitattributes:attrs bin:bin_orig misc:misc_orig + commit B main "B (no change)" parent=A .gitattributes:attrs bin:bin_orig misc:misc_orig + commit C main "C (changes misc)" parent=B .gitattributes:attrs bin:bin_orig misc:misc_C + commit D feature "D (changes bin)" parent=A .gitattributes:attrs bin:bin_D misc:misc_orig + commit M main "Merge feature" parent=C parent=D .gitattributes:attrs bin:bin_D misc:misc_C + commit E target "E (changes bin)" parent=A .gitattributes:attrs bin:bin_E misc:misc_orig + EOF +} + +test_expect_failure 'newly-introduced binary conflict is reported' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_new_conflict_binary && + + test_must_fail git replay --ref-action=print --branches \ + --onto E --ancestry-path B..M >out 2>err && + + # Discriminate against the "git replay refuses 2-parent + # merges entirely" failure mode (no conflict message at + # all). The replay must fail because of a real content + # conflict on `bin`, not for any other reason. + test_grep "CONFLICT (content)" err && + test_grep "Merge conflict in bin" err + ) +' + +build_vanishing_conflict () { + test-tool historian <<-\EOF + # B + # / \ + # A - C - M + # \ + # D - X (= cherry-picked B) + # + # X cherry-picks B on top of D that replaying C..M onto X + # no longer has a merge conflict + blob a L1 L2 L3 L4 + blob b L1 L2 _3 L4 + blob c L1 L2 +3 L4 + blob m _1 L2 _3 L4 + blob d L1 L2 +3 L4 L5 + blob x _1 L2 _3 L4 L5 + commit A main "A" a:a + commit B main "B (changes line 3)" parent=A a:b + commit C feature "C (changes line 3 differently)" parent=A a:c + commit M feature "Merge C and B (fix lines 1 and 3)" parent=B parent=C a:m + commit D feature "D (changes line 3 like B, adds a line)" parent=C a:d + commit X feature "X (cherry-picks B with fixup)" parent=D a:x + EOF +} + +test_expect_failure 'vanishing conflict in a replayed merge' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_vanishing_conflict && + + git replay --ref-action=print --branches \ + --onto X --ancestry-path C..M >out && + new_tip=$(cut -f 3 -d " " out) && + + # Since X already includes the changes of B, it is tree-same + # to the replayed merge + test_cmp_rev "$new_tip:a" X:a + ) +' + +build_fixture_with_marker_text () { + test-tool historian <<-\EOF + # A file `doc/markers.txt` whose plain content happens to + # contain the byte sequences `<<<<<<<`, `=======`, and + # `>>>>>>>` (e.g. a documentation page describing how Git + # writes conflict markers). The merge replay must NOT mistake + # those literal bytes for a real conflict on the path. + # + # The topology runs an actual merge replay so the regression + # guard means something: file `a` is the one that genuinely + # gets merged, and `doc/markers.txt` rides along unchanged + # across the history. A correct implementation can tell that + # `doc/markers.txt` was never the subject of an unresolved + # inner merge and leaves it alone. + blob a L1 L2 L3 + blob c L1 L2 _3 + blob d _1 L2 +3 + blob m _1 L2 M3 + blob markers "Example" "<<<<<<< ours" "first variant" "=======" "second variant" ">>>>>>> theirs" "EOF" + commit A main "A" a:a + commit B main "B (no a change)" parent=A a:a + commit C main "C (changes a only)" parent=B a:c + commit D feature "D (changes a only)" parent=A a:d + commit M main "Merge feature" parent=C parent=D a:m + commit E target "E (no a change)" parent=A a:a doc:markers + EOF +} + +test_expect_failure 'file containing literal conflict-marker bytes survives a replay-merge' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + build_fixture_with_marker_text && + + git replay --ref-action=print --branches \ + --onto E --ancestry-path B..M >out && + new_tip=$(cut -f 3 -d " " out) && + test -n "$new_tip" && + + # The fixture file is byte-for-byte identical across the + # whole history. Nothing about replay should have touched it. + test_cmp_rev "$new_tip:doc" E:doc + ) +' + +build_resolution_preserved_under_diff3 () { + test-tool historian <<-\EOF + # Topology where the inner remerge of (C, D) and the inner + # remerge of (C', D) have DIFFERENT merge bases: the original + # pair's merge base is A (close, on main), while the rewritten + # pair's merge base is R (far, where the long-lived target + # branch diverged before A). + # + # R ---- E (target branch) + # \ + # A ----+---- B ---- C ---- M (main) + # \ / + # +------ D --------+ (feature) + # + # Under `merge.conflictStyle=diff3`, conflict markers embed + # the merge base's abbreviated OID. When the inner remerges + # have different bases, the abbreviated-OID line in the + # markers differs even though both inner remerges produce + # byte-identical content for the conflicting region. A naive + # implementation lets that label difference look like a real + # delta between the two inner remerges and reports a spurious + # conflict where the original resolution should have collapsed + # through. The replay must pin the ancestor label across the + # two inner remerges so byte-identical conflicting content + # really compares as byte-identical. + # + # E touches a file `b` so that C' (cherry-pick of C onto E) + # carries a different tree from C; otherwise the trivial + # tree-equal fast path in the outer pick collapses everything + # and the diff3 marker text is never exercised. + blob a L1 L2 L3 + blob c L1 L2 _3 + blob d L1 L2 +3 + blob m L1 L2 M3 + blob b b_orig + blob e b_E_only + commit R main "R (root)" a:a b:b + commit A main "A" parent=R a:a b:b + commit B main "B (no change)" parent=A a:a b:b + commit C main "C (changes a)" parent=B a:c b:b + commit D feature "D (changes a differently)" parent=A a:d b:b + commit M main "Merge feature into main" parent=C parent=D a:m b:b + commit E target "E (changes b only)" parent=R a:a b:e + EOF +} + +test_expect_failure 'previously-resolved conflicts with diff3' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git config merge.conflictStyle diff3 && + build_resolution_preserved_under_diff3 && + + git replay --ref-action=print --branches \ + --onto E --ancestry-path B..M >out && + new_tip=$(cut -f 3 -d " " out) && + test -n "$new_tip" && + + test_write_lines L1 L2 M3 >expect && + git show "$new_tip:a" >actual && + test_cmp expect actual + ) +' + +test_done From 5855a84f8c2d651389782e80d8c02e16f1c6dbf0 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 13 May 2026 18:01:37 +0000 Subject: [PATCH 03/11] xdiff, merge-ll, merge-ort: optionally record conflicts' byte offsets The imminent 2-parent merge replay support implements a merge commit's replay as an outer three-way merge between three trees the inner machinery already knows how to compute: the auto-merge of the original parents, the original merge's actual tree, and the auto-merge of the rebased parents. Either of the auto-merges can carry unresolved conflicts -- residual conflict markers from the inner xdl_merge calls, or a non-resolution from a binary or custom driver -- and the outer merge has no way of telling those conflict bytes apart from any other content unless the inner merge that produced them tells it. Let the xdiff machinery optionally populate an array of the ranges where conflicts were recorded. A `struct xdl_conflict_interval` is a half-open byte range identifying one complete conflict-marker block in the result buffer that xdl_merge produced; a `struct xdl_conflict_intervals` is the growable list of those, owned by the caller and released with the new xdl_conflict_intervals_release() helper. merge-ort's collect_merge_info_callback() and process_entry() both have shortcuts that take side2's blob verbatim when side1 matches the merge base. Both now consult in_replay_side2_intervals: if the strmap has an entry for the path, the shortcut is skipped (collect_merge_info_callback) or the result is marked conflicted with a CONFLICT (content) message (process_entry). For the non-xdiff drivers (binary, custom, union, ours/theirs) merge_3way() stores an empty interval list with `nr == 0` paired with a non-clean ll_merge status, so the presence-not-content check above also catches a fresh binary conflict for which no marker bytes were emitted. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- merge-ll.c | 3 ++ merge-ll.h | 14 ++++++++ merge-ort.c | 79 +++++++++++++++++++++++++++++++++++++++++--- merge-ort.h | 33 +++++++++++++++++++ xdiff/xdiff.h | 51 +++++++++++++++++++++++++++++ xdiff/xmerge.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++--- 6 files changed, 259 insertions(+), 10 deletions(-) diff --git a/merge-ll.c b/merge-ll.c index fafe2c91971856..48c97c8aec6cf9 100644 --- a/merge-ll.c +++ b/merge-ll.c @@ -141,6 +141,9 @@ static enum ll_merge_result ll_xdl_merge(const struct ll_merge_driver *drv_unuse xmp.ancestor = orig_name; xmp.file1 = name1; xmp.file2 = name2; + xmp.out_intervals = opts->out_intervals; + xmp.in_orig_intervals = opts->in_orig_intervals; + xmp.in_side2_intervals = opts->in_side2_intervals; status = xdl_merge(orig, src1, src2, &xmp, result); ret = (status > 0) ? LL_MERGE_CONFLICT : status; return ret; diff --git a/merge-ll.h b/merge-ll.h index d038ee0c1e81f7..ce51331f196a39 100644 --- a/merge-ll.h +++ b/merge-ll.h @@ -83,6 +83,20 @@ struct ll_merge_options { /* Extra xpparam_t flags as defined in xdiff/xdiff.h. */ long xdl_opts; + + /* + * Forwarded onto xmparam_t. See xdiff/xdiff.h:s_xmparam. + * `out_intervals`, when set, receives the byte ranges of + * every conflict-marker hunk xdl_merge writes into the + * result buffer. `in_orig_intervals` and + * `in_side2_intervals` request the newly-introduced-conflict + * detection in the outer merge of a replayed merge commit; + * setting `in_side2_intervals` to a non-NULL pointer is the + * opt-in. + */ + struct xdl_conflict_intervals *out_intervals; + struct xdl_conflict_intervals *in_orig_intervals; + struct xdl_conflict_intervals *in_side2_intervals; }; #define LL_MERGE_OPTIONS_INIT { .conflict_style = -1 } diff --git a/merge-ort.c b/merge-ort.c index 00923ce3cd749b..e04e1ceee095a5 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -1371,13 +1371,31 @@ static int collect_merge_info_callback(int n, * files, then we can use side2 as the resolution. We cannot * necessarily do so this for trees, because there may be rename * destinations within side2. + * + * Under the replay-merge interval side channel, however, this + * shortcut would silently install side2's blob even when an + * earlier inner merge left an unresolved conflict on the path: + * the conflict-marker bytes (or, for non-textual drivers, the + * unresolved binary blob) would land in the result verbatim. + * Skip the shortcut when a side-channel entry says side2 is + * unresolved for this path; the path then falls through to the + * full content merge below, where xdl_merge or the loud-gate + * for non-xdiff drivers will surface the conflict. */ if (side1_matches_mbase && filemask == 0x07) { - /* use side2 version as resolution */ - setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, - names, names+2, side2_null, 0, - filemask, dirmask, 1); - return mask; + int side2_unresolved = 0; + + if (opt->in_replay_side2_intervals && + strmap_get(opt->in_replay_side2_intervals, fullpath)) + side2_unresolved = 1; + + if (!side2_unresolved) { + /* use side2 version as resolution */ + setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, + names, names+2, side2_null, 0, + filemask, dirmask, 1); + return mask; + } } /* Similar to above but swapping sides 1 and 2 */ @@ -2105,6 +2123,7 @@ static int merge_3way(struct merge_options *opt, { mmfile_t orig, src1, src2; struct ll_merge_options ll_opts = LL_MERGE_OPTIONS_INIT; + struct xdl_conflict_intervals out_intervals = { 0 }; char *base, *name1, *name2; enum ll_merge_result merge_status; @@ -2115,6 +2134,14 @@ static int merge_3way(struct merge_options *opt, ll_opts.extra_marker_size = extra_marker_size; ll_opts.xdl_opts = opt->xdl_opts; ll_opts.conflict_style = opt->conflict_style; + if (opt->out_replay_intervals) + ll_opts.out_intervals = &out_intervals; + if (opt->in_replay_orig_intervals) + ll_opts.in_orig_intervals = + strmap_get(opt->in_replay_orig_intervals, path); + if (opt->in_replay_side2_intervals) + ll_opts.in_side2_intervals = + strmap_get(opt->in_replay_side2_intervals, path); if (opt->priv->call_depth) { ll_opts.virtual_ancestor = 1; @@ -2157,6 +2184,28 @@ static int merge_3way(struct merge_options *opt, "warning: Cannot merge binary files: %s (%s vs. %s)", path, name1, name2); + if (opt->out_replay_intervals && + (out_intervals.nr > 0 || + merge_status == LL_MERGE_CONFLICT || + merge_status == LL_MERGE_BINARY_CONFLICT)) { + /* + * Inner content merge for this path emitted markers + * (recorded as intervals) or otherwise failed to + * resolve (binary driver or other non-textual + * driver). Either way, the path carries an + * unresolved inner conflict that the outer merge + * needs to know about; transfer ownership of the + * recorded intervals (possibly empty) into the + * caller-owned strmap. + */ + struct xdl_conflict_intervals *stored; + stored = xmalloc(sizeof(*stored)); + *stored = out_intervals; + strmap_put(opt->out_replay_intervals, path, stored); + } else { + xdl_conflict_intervals_release(&out_intervals); + } + free(base); free(name1); free(name2); @@ -4216,6 +4265,26 @@ static int process_entry(struct merge_options *opt, assert(othermask == 2 || othermask == 4); assert(ci->merged.is_null == (ci->filemask == ci->match_mask)); + + /* + * Replay-merge side channel: when this match_mask + * shortcut would have taken side2's blob verbatim + * (match_mask == 3 means stages 0 and 1 agree, + * stage 2 differs), but an earlier inner merge + * recorded that side2 is unresolved for this + * path, surface a content conflict instead of + * silently committing the inner-conflict blob. + */ + if (ci->merged.clean && side == 2 && + !ci->merged.is_null && + opt->in_replay_side2_intervals && + strmap_get(opt->in_replay_side2_intervals, path)) { + ci->merged.clean = 0; + path_msg(opt, CONFLICT_CONTENTS, 0, + path, NULL, NULL, NULL, + _("CONFLICT (content): Merge conflict in %s"), + path); + } } } else if (ci->filemask >= 6 && (S_IFMT & ci->stages[1].mode) != diff --git a/merge-ort.h b/merge-ort.h index 6045579825da8b..8b1545b009b078 100644 --- a/merge-ort.h +++ b/merge-ort.h @@ -87,6 +87,39 @@ struct merge_options { unsigned record_conflict_msgs_as_headers : 1; const char *msg_header_prefix; + /* + * Conflict-interval side channel for replay-merge. Each strmap, + * when non-NULL, maps a path to a heap-allocated + * `struct xdl_conflict_intervals *`. The caller owns both the + * strmaps and the value pointers. + * + * out_replay_intervals: populated by the per-path content + * merge. After ll_merge returns for a path, the recorded + * intervals are moved into a freshly allocated struct + * stored under the path. Paths whose inner content merge + * was clean do not appear; paths whose ll_merge call + * produced conflict markers (text driver) or returned a + * non-clean status (binary, custom, union, ours/theirs) + * do. + * + * in_replay_orig_intervals, in_replay_side2_intervals: + * looked up per path during the outer merge and forwarded + * into ll_merge_options.in_orig_intervals / + * ll_merge_options.in_side2_intervals so that xdl_merge + * can detect a conflict in mf2 (side2) that has no + * counterpart in orig. + * + * The fast paths in collect_merge_info_callback() and + * process_entry() that would otherwise take side2's blob + * verbatim also consult `in_replay_side2_intervals`: a + * present entry forces the path through the full content + * merge (or, when the entry was recorded for a non-xdiff + * driver, surfaces an explicit conflict). + */ + struct strmap *out_replay_intervals; + struct strmap *in_replay_orig_intervals; + struct strmap *in_replay_side2_intervals; + /* internal fields used by the implementation */ struct merge_options_internal *priv; }; diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h index dc370712e92860..02314d1ef527ed 100644 --- a/xdiff/xdiff.h +++ b/xdiff/xdiff.h @@ -130,6 +130,30 @@ long xdl_mmfile_size(mmfile_t *mmf); int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdemitconf_t const *xecfg, xdemitcb_t *ecb); +/* + * A half-open byte range identifying one conflict-marker block + * (from the leading "<<<<<<<" line through the trailing newline of + * the matching ">>>>>>>" line) inside a buffer produced by + * xdl_merge. + */ +struct xdl_conflict_interval { + long start; /* offset of the first byte of the opener line */ + long end; /* offset one past the trailing newline of the closer line */ +}; + +/* + * A growable list of xdl_conflict_interval records, in order of + * appearance in the produced buffer. Initialize all-zero; release + * with xdl_conflict_intervals_release. + */ +struct xdl_conflict_intervals { + struct xdl_conflict_interval *items; + long nr; + long alloc; +}; + +void xdl_conflict_intervals_release(struct xdl_conflict_intervals *intervals); + typedef struct s_xmparam { xpparam_t xpp; int marker_size; @@ -139,6 +163,33 @@ typedef struct s_xmparam { const char *ancestor; /* label for orig */ const char *file1; /* label for mf1 */ const char *file2; /* label for mf2 */ + /* + * Side channel for detecting a fresh conflict introduced by + * mf2 (side2) that has no counterpart in orig (the merge + * base). Useful when xdl_merge is the outer merge of a + * replayed merge commit: orig and mf2 may themselves be the + * output of earlier xdl_merge calls that emitted + * conflict-marker hunks into their buffers, and the normal + * three-way merge cannot tell those literal marker bytes + * apart from any other text. + * + * out_intervals (output): when non-NULL, xdl_merge populates + * it with the byte range of every conflict-marker hunk it + * writes into the result buffer. Use this on the inner + * merges whose output will later be fed to an outer merge as + * orig or mf2. The caller initializes the struct all-zero + * and releases it with xdl_conflict_intervals_release. + * + * in_orig_intervals, in_side2_intervals (inputs): when + * in_side2_intervals is non-NULL, xdl_merge counts every + * recorded mf2 hunk that has no byte-equal counterpart in + * orig as an additional conflict in the returned status; if + * in_orig_intervals is NULL, every mf2 hunk is counted. Pass + * NULL on side2 to opt out. + */ + struct xdl_conflict_intervals *out_intervals; + struct xdl_conflict_intervals *in_orig_intervals; + struct xdl_conflict_intervals *in_side2_intervals; } xmparam_t; #define DEFAULT_CONFLICT_MARKER_SIZE 7 diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c index 29dad98c496b07..69991e303ecc02 100644 --- a/xdiff/xmerge.c +++ b/xdiff/xmerge.c @@ -197,12 +197,14 @@ static int fill_conflict_hunk(xdfenv_t *xe1, const char *name1, xdfenv_t *xe2, const char *name2, const char *name3, int size, int i, int style, - xdmerge_t *m, char *dest, int marker_size) + xdmerge_t *m, char *dest, int marker_size, + struct xdl_conflict_intervals *intervals) { int marker1_size = (name1 ? strlen(name1) + 1 : 0); int marker2_size = (name2 ? strlen(name2) + 1 : 0); int marker3_size = (name3 ? strlen(name3) + 1 : 0); int needs_cr = is_cr_needed(xe1, xe2, m); + int block_start = -1; if (marker_size <= 0) marker_size = DEFAULT_CONFLICT_MARKER_SIZE; @@ -211,6 +213,9 @@ static int fill_conflict_hunk(xdfenv_t *xe1, const char *name1, size += xdl_recs_copy(xe1, i, m->i1 - i, 0, 0, dest ? dest + size : NULL); + if (dest && intervals) + block_start = size; + if (!dest) { size += marker_size + 1 + needs_cr + marker1_size; } else { @@ -277,6 +282,12 @@ static int fill_conflict_hunk(xdfenv_t *xe1, const char *name1, dest[size++] = '\r'; dest[size++] = '\n'; } + + if (dest && intervals && block_start >= 0) { + intervals->items[intervals->nr].start = block_start; + intervals->items[intervals->nr].end = size; + intervals->nr++; + } return size; } @@ -285,7 +296,8 @@ static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1, const char *ancestor_name, int favor, xdmerge_t *m, char *dest, int style, - int marker_size) + int marker_size, + struct xdl_conflict_intervals *intervals) { int size, i; @@ -297,7 +309,7 @@ static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1, size = fill_conflict_hunk(xe1, name1, xe2, name2, ancestor_name, size, i, style, m, dest, - marker_size); + marker_size, intervals); else if (m->mode & 3) { /* Before conflicting part */ size += xdl_recs_copy(xe1, i, m->i1 - i, 0, 0, @@ -664,23 +676,53 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1, /* output */ if (result) { int marker_size = xmp->marker_size; + struct xdl_conflict_intervals *intervals = xmp->out_intervals; int size = xdl_fill_merge_buffer(xe1, name1, xe2, name2, ancestor_name, favor, changes, NULL, style, - marker_size); + marker_size, NULL); result->ptr = xdl_malloc(size); if (!result->ptr) { xdl_cleanup_merge(changes); return -1; } result->size = size; + if (intervals) { + xdmerge_t *p; + long n_conflicts = 0; + + for (p = changes; p; p = p->next) + if (p->mode == 0) + n_conflicts++; + intervals->nr = 0; + if (n_conflicts > 0 && + XDL_ALLOC_GROW(intervals->items, n_conflicts, + intervals->alloc)) { + xdl_free(result->ptr); + result->ptr = NULL; + result->size = 0; + xdl_cleanup_merge(changes); + return -1; + } + } xdl_fill_merge_buffer(xe1, name1, xe2, name2, ancestor_name, favor, changes, - result->ptr, style, marker_size); + result->ptr, style, marker_size, + intervals); } return xdl_cleanup_merge(changes); } +void xdl_conflict_intervals_release(struct xdl_conflict_intervals *intervals) +{ + if (!intervals) + return; + xdl_free(intervals->items); + intervals->items = NULL; + intervals->nr = 0; + intervals->alloc = 0; +} + int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2, xmparam_t const *xmp, mmbuffer_t *result) { @@ -727,6 +769,43 @@ int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2, &xe2, xscr2, xmp, result); } + + if (status >= 0 && xmp->in_side2_intervals) { + /* + * Count every conflict-marker hunk in mf2 whose bytes + * do not appear verbatim in orig. The result buffer + * already contains those bytes (xdl_do_merge applied + * the side2-only change, or the !xscr1 short-circuit + * copied mf2 wholesale); the only thing the caller + * needs from us is for the returned status to reflect + * them as real conflicts. + */ + const struct xdl_conflict_intervals *oi = xmp->in_orig_intervals; + const struct xdl_conflict_intervals *si = xmp->in_side2_intervals; + long i, j; + + for (i = 0; i < si->nr; i++) { + long s_len = si->items[i].end - si->items[i].start; + int matched = 0; + + if (oi) { + for (j = 0; j < oi->nr; j++) { + long o_len = oi->items[j].end - oi->items[j].start; + + if (o_len != s_len) + continue; + if (!memcmp((const char *)orig->ptr + oi->items[j].start, + (const char *)mf2->ptr + si->items[i].start, + s_len)) { + matched = 1; + break; + } + } + } + if (!matched) + status++; + } + } out: xdl_free_script(xscr1); xdl_free_script(xscr2); From ca2fda8aa40838e58baca880b2df2d19512bbfe4 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 5 May 2026 13:15:44 +0000 Subject: [PATCH 04/11] replay: support replaying 2-parent merges `git history` (introduced in v2.54) and the underlying `git replay` infrastructure both refused to walk past any commit with more than one parent, dying with "replaying merge commits is not supported yet!". For real history-rewriting work this is a showstopper: the natural fallback `git rebase --rebase-merges` is interactive and stops to ask for re-resolution even when no re-resolution is needed. Lift that limitation for the 2-parent case. A merge commit M on parents (P1, P2) records both an automatic merge of those parents AND any manual layer the author put on top of that automatic merge (textual conflict resolution and any semantic edit outside conflict markers). Replaying M onto rewritten parents (P1', P2') must preserve that manual layer, but the rewritten parents change the automatic merge, so a simple cherry-pick is wrong: the manual layer would land on top of stale auto-merge text. What works instead is a three-way merge of three trees the existing infrastructure already knows how to compute. Let R be the recursive auto-merge of (P1, P2), O be M's actual tree and N be the recursive auto-merge of (P1', P2'). `git diff R O` is morally `git show --remerge-diff M`: it captures exactly what the author added on top of the automatic merge. A non-recursive 3-way merge with R as the merge base, O as side 1 and N as side 2 layers that manual contribution onto the freshly auto-merged rewritten parents and produces the replayed tree. Implement `pick_merge_commit()` along those lines and dispatch to it from `replay_revisions()` when the commit being replayed has exactly two parents. Three details keep the non-trivial cases sound: 1. R and N use identical `merge_options.branch1` / `branch2` labels ("ours"/"theirs"). When the original parents conflicted on a region of a file, both R and N produce textually identical conflict markers; the outer non-recursive merge then sees N equal to R in that region and the user's manual resolution from O wins cleanly. Without this, the conflict-marker text would differ between R and N (the inner merges would label the conflicts differently), and the outer merge would itself be unclean even when the user did supply a clean resolution. 2. An unclean inner merge (`result.clean == 0`) is not fatal: the tree merge-ort produces in that case still has well-defined contents (with conflict markers in the conflicted files) and is a valid input to the outer non-recursive merge. Only a real error (`< 0`) propagates as failure. 3. Each inner merge publishes the byte ranges of every conflict-marker hunk it emits via the new `out_replay_intervals` strmap on `merge_options`. The outer merge receives both maps via `in_replay_orig_intervals` and `in_replay_side2_intervals`, so xdl_merge can detect a hunk in N that has no byte-equal counterpart in R (a conflict the original parents did not have) and surface it as a real conflict rather than absorbing the marker bytes as plain content. The same channel makes merge-ort's trivial fast paths refuse to take side2's blob verbatim for paths whose inner merge was unresolved, which also covers paths handled by binary or other non-textual drivers. The replay propagates the textual diffs the user actually made in M; it does not extrapolate symbol-level intent. If rewriting the parents pulls in genuinely new content (for example, a brand-new caller of a function that the merge renamed), that new content stays as the rewritten parents have it. Symbol-aware refactoring is out of scope, just as it is for plain rebase. Octopus merges (more than two parents) and revert-of-merge are not supported and are surfaced as explicit errors at the dispatch point. The "split" sub-command of `git history` continues to refuse when its target is a merge; split semantics do not apply there. The pre-walk gate in `builtin/history.c` that previously rejected any merge in the rewrite path now only rejects octopus merges; rename it accordingly. The walk in replay_revisions() pre-seeds the rewritten-commit map with each negative-side ("BOTTOM") cmdline entry pointing at `onto`. This matters for merge replay: a 2-parent merge whose first parent is the boundary (e.g. the commit being reworded) must replay onto the rewritten boundary, but pick_merge_commit's mapped_commit fallback would return the original parent for anything not in the map. Pre-seeding the boundary makes the lookup return `onto`, which is the rewritten counterpart. Skip the pre-seed in revert mode, which chains reverts through last_commit and would short-circuit incorrectly. A small refactor in `create_commit()` makes the merge case possible: the helper now takes a `struct commit_list *parents` rather than a single parent pointer and takes ownership of the list. The single existing caller in `pick_regular_commit()` builds and passes a one-element list; the new `pick_merge_commit()` builds a two-element list, with parent order preserved. Update the negative expectations in t3451, t3452 and t3650 that were asserting the now-retired "not supported yet" message, replacing them with positive coverage where it fits. The 2-parent merge scenarios in t3454 (introduced as failing baselines in an earlier commit) flip to test_expect_success, with the exception of the `merge.conflictStyle=diff3` case that needs an additional ancestor-label fix in a subsequent commit. Helped-by: Elijah Newren Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- builtin/history.c | 16 ++- replay.c | 252 ++++++++++++++++++++++++++++++++++++-- t/t3451-history-reword.sh | 21 +++- t/t3452-history-split.sh | 6 +- t/t3454-history-merges.sh | 31 ++--- t/t3650-replay-basics.sh | 46 ++++++- 6 files changed, 328 insertions(+), 44 deletions(-) diff --git a/builtin/history.c b/builtin/history.c index 952693808574b7..00097b2226e042 100644 --- a/builtin/history.c +++ b/builtin/history.c @@ -195,15 +195,15 @@ static int parse_ref_action(const struct option *opt, const char *value, int uns return 0; } -static int revwalk_contains_merges(struct repository *repo, - const struct strvec *revwalk_args) +static int revwalk_contains_octopus_merges(struct repository *repo, + const struct strvec *revwalk_args) { struct strvec args = STRVEC_INIT; struct rev_info revs; int ret; strvec_pushv(&args, revwalk_args->v); - strvec_push(&args, "--min-parents=2"); + strvec_push(&args, "--min-parents=3"); repo_init_revisions(repo, &revs, NULL); @@ -217,7 +217,7 @@ static int revwalk_contains_merges(struct repository *repo, } if (get_revision(&revs)) { - ret = error(_("replaying merge commits is not supported yet!")); + ret = error(_("replaying octopus merges is not supported")); goto out; } @@ -289,7 +289,7 @@ static int setup_revwalk(struct repository *repo, strvec_push(&args, "HEAD"); } - ret = revwalk_contains_merges(repo, &args); + ret = revwalk_contains_octopus_merges(repo, &args); if (ret < 0) goto out; @@ -482,6 +482,9 @@ static int cmd_history_reword(int argc, if (ret < 0) { ret = error(_("failed replaying descendants")); goto out; + } else if (ret) { + ret = error(_("conflict during replay; some descendants were not rewritten")); + goto out; } ret = 0; @@ -721,6 +724,9 @@ static int cmd_history_split(int argc, if (ret < 0) { ret = error(_("failed replaying descendants")); goto out; + } else if (ret) { + ret = error(_("conflict during replay; some descendants were not rewritten")); + goto out; } ret = 0; diff --git a/replay.c b/replay.c index f96f1f6551ae63..5b7b9b4ac09a12 100644 --- a/replay.c +++ b/replay.c @@ -1,6 +1,7 @@ #define USE_THE_REPOSITORY_VARIABLE #include "git-compat-util.h" +#include "commit-reach.h" #include "environment.h" #include "hex.h" #include "merge-ort.h" @@ -11,6 +12,7 @@ #include "sequencer.h" #include "strmap.h" #include "tree.h" +#include "xdiff/xdiff.h" /* * We technically need USE_THE_REPOSITORY_VARIABLE for DEFAULT_ABBREV, but @@ -77,15 +79,21 @@ static void generate_revert_message(struct strbuf *msg, repo_unuse_commit_buffer(repo, commit, message); } +/* + * Build a new commit with the given tree and parent list, copying author, + * extra headers and (for pick mode) the commit message from `based_on`. + * + * Takes ownership of `parents`: it will be freed before returning, even on + * error. Parent order is preserved as supplied by the caller. + */ static struct commit *create_commit(struct repository *repo, struct tree *tree, struct commit *based_on, - struct commit *parent, + struct commit_list *parents, enum replay_mode mode) { struct object_id ret; struct object *obj = NULL; - struct commit_list *parents = NULL; char *author = NULL; char *sign_commit = NULL; /* FIXME: cli users might want to sign again */ struct commit_extra_header *extra = NULL; @@ -96,7 +104,6 @@ static struct commit *create_commit(struct repository *repo, const char *orig_message = NULL; const char *exclude_gpgsig[] = { "gpgsig", "gpgsig-sha256", NULL }; - commit_list_insert(parent, &parents); extra = read_commit_extra_headers(based_on, exclude_gpgsig); if (mode == REPLAY_MODE_REVERT) { generate_revert_message(&msg, based_on, repo); @@ -263,6 +270,20 @@ static struct commit *mapped_commit(kh_oid_map_t *replayed_commits, return kh_value(replayed_commits, pos); } +static void release_replay_intervals_strmap(struct strmap *map) +{ + struct hashmap_iter iter; + struct strmap_entry *entry; + + strmap_for_each_entry(map, &iter, entry) { + struct xdl_conflict_intervals *intervals = entry->value; + + xdl_conflict_intervals_release(intervals); + free(intervals); + } + strmap_clear(map, 0); +} + static struct commit *pick_regular_commit(struct repository *repo, struct commit *pickme, kh_oid_map_t *replayed_commits, @@ -273,6 +294,7 @@ static struct commit *pick_regular_commit(struct repository *repo, { struct commit *base, *replayed_base; struct tree *pickme_tree, *base_tree, *replayed_base_tree; + struct commit_list *parents = NULL; if (pickme->parents) { base = pickme->parents->item; @@ -327,7 +349,169 @@ static struct commit *pick_regular_commit(struct repository *repo, if (oideq(&replayed_base_tree->object.oid, &result->tree->object.oid) && !oideq(&pickme_tree->object.oid, &base_tree->object.oid)) return replayed_base; - return create_commit(repo, result->tree, pickme, replayed_base, mode); + commit_list_insert(replayed_base, &parents); + return create_commit(repo, result->tree, pickme, parents, mode); +} + +/* + * Replay a 2-parent merge commit by composing three calls into merge-ort: + * + * R = recursive merge of pickme's two original parents (auto-remerge of + * the original merge, accepting any conflicts) + * N = recursive merge of the (possibly rewritten) parents + * O = pickme's tree (the user's actual merge, including any manual + * resolutions) + * + * The picked tree comes from a non-recursive merge using R as the base, + * O as side1 and N as side2: `git diff R O` is morally `git show + * --remerge-diff $oldmerge`, so this layers the user's original manual + * resolution on top of the freshly auto-merged rewritten parents. + * + * If the outer 3-way merge is unclean, propagate the conflict status to + * the caller via `result->clean = 0` and return NULL. The two inner + * merges (R and N) being unclean is _not_ fatal: the conflict-markered + * trees they produce are valid inputs to the outer merge, and using + * identical labels for both inner merges keeps the marker text + * byte-equal between R and N so the user's resolution recorded in O + * collapses the conflict cleanly there. + * + * The conflict-interval side channel from merge-ort goes a step + * further: each inner merge publishes the byte ranges of every + * conflict-marker hunk it emits (per path), and the outer merge + * consults those maps so a hunk that appears in N without an + * identical counterpart in R is reported as a real conflict instead + * of being silently embedded as plain marker bytes in the replayed + * commit. + * + * Octopus merges (more than two parents) and revert-of-merge are + * rejected by the caller before this function is invoked. + */ +static struct commit *pick_merge_commit(struct repository *repo, + struct commit *pickme, + kh_oid_map_t *replayed_commits, + struct merge_options *merge_opt, + struct merge_result *result) +{ + struct commit *parent1, *parent2; + struct commit *replayed_parent1, *replayed_parent2; + struct tree *pickme_tree; + struct merge_options remerge_opt = { 0 }; + struct merge_options new_merge_opt = { 0 }; + struct merge_result remerge_res = { 0 }; + struct merge_result new_merge_res = { 0 }; + struct commit_list *parent_bases = NULL; + struct commit_list *replayed_bases = NULL; + struct commit_list *parents; + struct commit *picked = NULL; + char *ancestor_name = NULL; + struct strmap r_intervals = STRMAP_INIT; + struct strmap n_intervals = STRMAP_INIT; + struct strmap *saved_orig_in; + struct strmap *saved_side2_in; + + parent1 = pickme->parents->item; + parent2 = pickme->parents->next->item; + + /* + * Map the merge's parents to their replayed counterparts. A parent + * outside the rewrite range has no entry in `replayed_commits` and + * must stay at its original commit, so use the parent itself as the + * fallback for both sides. + */ + replayed_parent1 = mapped_commit(replayed_commits, parent1, parent1); + replayed_parent2 = mapped_commit(replayed_commits, parent2, parent2); + + /* + * R: auto-remerge of the original parents. + * + * Use the same branch labels for the inner merges that compute R + * and N so conflict markers (if any) are textually identical + * between the two; the outer non-recursive merge can then collapse + * the manual resolution from O against them. Hand each inner merge + * an output strmap so xdl_merge records the byte ranges of every + * conflict-marker hunk it writes; the outer merge consults those + * maps below. + */ + init_basic_merge_options(&remerge_opt, repo); + remerge_opt.show_rename_progress = 0; + remerge_opt.branch1 = "ours"; + remerge_opt.branch2 = "theirs"; + remerge_opt.out_replay_intervals = &r_intervals; + if (repo_get_merge_bases(repo, parent1, parent2, &parent_bases) < 0) { + result->clean = -1; + goto out; + } + merge_incore_recursive(&remerge_opt, parent_bases, + parent1, parent2, &remerge_res); + parent_bases = NULL; /* consumed by merge_incore_recursive */ + if (remerge_res.clean < 0) { + result->clean = remerge_res.clean; + goto out; + } + + /* N: fresh merge of the (possibly rewritten) parents. */ + init_basic_merge_options(&new_merge_opt, repo); + new_merge_opt.show_rename_progress = 0; + new_merge_opt.branch1 = "ours"; + new_merge_opt.branch2 = "theirs"; + new_merge_opt.out_replay_intervals = &n_intervals; + if (repo_get_merge_bases(repo, replayed_parent1, replayed_parent2, + &replayed_bases) < 0) { + result->clean = -1; + goto out; + } + merge_incore_recursive(&new_merge_opt, replayed_bases, + replayed_parent1, replayed_parent2, &new_merge_res); + replayed_bases = NULL; /* consumed by merge_incore_recursive */ + if (new_merge_res.clean < 0) { + result->clean = new_merge_res.clean; + goto out; + } + + /* + * Outer non-recursive merge: base=R, side1=O (pickme), side2=N. + * Hand the inner interval strmaps to the outer merge so the + * per-path content merge can surface any conflict hunk in N that + * has no counterpart in R. + */ + pickme_tree = repo_get_commit_tree(repo, pickme); + ancestor_name = xstrfmt("auto-remerge of %s", + oid_to_hex(&pickme->object.oid)); + merge_opt->ancestor = ancestor_name; + merge_opt->branch1 = short_commit_name(repo, pickme); + merge_opt->branch2 = "merge of replayed parents"; + saved_orig_in = merge_opt->in_replay_orig_intervals; + saved_side2_in = merge_opt->in_replay_side2_intervals; + merge_opt->in_replay_orig_intervals = &r_intervals; + merge_opt->in_replay_side2_intervals = &n_intervals; + merge_incore_nonrecursive(merge_opt, + remerge_res.tree, + pickme_tree, + new_merge_res.tree, + result); + merge_opt->ancestor = NULL; + merge_opt->branch1 = NULL; + merge_opt->branch2 = NULL; + merge_opt->in_replay_orig_intervals = saved_orig_in; + merge_opt->in_replay_side2_intervals = saved_side2_in; + if (!result->clean) + goto out; + + parents = NULL; + commit_list_insert(replayed_parent2, &parents); + commit_list_insert(replayed_parent1, &parents); + picked = create_commit(repo, result->tree, pickme, parents, + REPLAY_MODE_PICK); + +out: + free(ancestor_name); + free_commit_list(parent_bases); + free_commit_list(replayed_bases); + merge_finalize(&remerge_opt, &remerge_res); + merge_finalize(&new_merge_opt, &new_merge_res); + release_replay_intervals_strmap(&r_intervals); + release_replay_intervals_strmap(&n_intervals); + return picked; } void replay_result_release(struct replay_result *result) @@ -407,17 +591,63 @@ int replay_revisions(struct rev_info *revs, merge_opt.show_rename_progress = 0; last_commit = onto; replayed_commits = kh_init_oid_map(); + + /* + * Seed the rewritten-commit map with each negative-side ("BOTTOM") + * cmdline entry pointing at `onto`. This matters for merge replay: + * a 2-parent merge whose first parent is the boundary (e.g. the + * commit being reworded) must replay onto the rewritten boundary, + * yet pick_merge_commit's mapped_commit fallback returns the + * original parent for anything that is not in the map. Pre-seeding + * the boundary makes the lookup return `onto` for those parents, + * which is the rewritten counterpart of the boundary commit. + * + * Only do this for the pick path; revert mode chains reverts + * through last_commit and a pre-seeded boundary would short-circuit + * that chain. + */ + if (mode == REPLAY_MODE_PICK) { + for (size_t i = 0; i < revs->cmdline.nr; i++) { + struct rev_cmdline_entry *e = &revs->cmdline.rev[i]; + struct commit *boundary; + khint_t pos; + int hr; + + if (!(e->flags & BOTTOM)) + continue; + boundary = lookup_commit_reference_gently(revs->repo, + &e->item->oid, 1); + if (!boundary) + continue; + pos = kh_put_oid_map(replayed_commits, + boundary->object.oid, &hr); + if (hr != 0) + kh_value(replayed_commits, pos) = onto; + } + } + while ((commit = get_revision(revs))) { const struct name_decoration *decoration; khint_t pos; int hr; - if (commit->parents && commit->parents->next) - die(_("replaying merge commits is not supported yet!")); - - last_commit = pick_regular_commit(revs->repo, commit, replayed_commits, - mode == REPLAY_MODE_REVERT ? last_commit : onto, - &merge_opt, &result, mode); + if (commit->parents && commit->parents->next) { + if (commit->parents->next->next) { + ret = error(_("replaying octopus merges is not supported")); + goto out; + } + if (mode == REPLAY_MODE_REVERT) { + ret = error(_("reverting merge commits is not supported")); + goto out; + } + last_commit = pick_merge_commit(revs->repo, commit, + replayed_commits, + &merge_opt, &result); + } else { + last_commit = pick_regular_commit(revs->repo, commit, replayed_commits, + mode == REPLAY_MODE_REVERT ? last_commit : onto, + &merge_opt, &result, mode); + } if (!last_commit) break; @@ -459,6 +689,8 @@ int replay_revisions(struct rev_info *revs, } if (!result.clean) { + merge_display_update_messages(&merge_opt, /* detailed */ 0, + &result); ret = 1; goto out; } diff --git a/t/t3451-history-reword.sh b/t/t3451-history-reword.sh index de7b357685db4a..d103f866a2b99a 100755 --- a/t/t3451-history-reword.sh +++ b/t/t3451-history-reword.sh @@ -201,12 +201,21 @@ test_expect_success 'can reword a merge commit' ' git switch - && git merge theirs && - # It is not possible to replay merge commits embedded in the - # history (yet). - test_must_fail git -c core.editor=false history reword HEAD~ 2>err && - test_grep "replaying merge commits is not supported yet" err && + # Reword a non-merge commit whose descendants include the + # merge: replay carries the merge through. + reword_with_message HEAD~ <<-EOF && + ours reworded + EOF + expect_graph <<-EOF && + * Merge tag ${SQ}theirs${SQ} + |\\ + | * theirs + * | ours reworded + |/ + * base + EOF - # But it is possible to reword a merge commit directly. + # And reword a merge commit directly. reword_with_message HEAD <<-EOF && Reworded merge commit EOF @@ -214,7 +223,7 @@ test_expect_success 'can reword a merge commit' ' * Reworded merge commit |\ | * theirs - * | ours + * | ours reworded |/ * base EOF diff --git a/t/t3452-history-split.sh b/t/t3452-history-split.sh index 8ed0cebb500296..ad6309f98b13c0 100755 --- a/t/t3452-history-split.sh +++ b/t/t3452-history-split.sh @@ -36,7 +36,7 @@ expect_tree_entries () { test_cmp expect actual } -test_expect_success 'refuses to work with merge commits' ' +test_expect_success 'refuses to split a merge commit' ' test_when_finished "rm -rf repo" && git init repo && ( @@ -49,9 +49,7 @@ test_expect_success 'refuses to work with merge commits' ' git switch - && git merge theirs && test_must_fail git history split HEAD 2>err && - test_grep "cannot split up merge commit" err && - test_must_fail git history split HEAD~ 2>err && - test_grep "replaying merge commits is not supported yet" err + test_grep "cannot split up merge commit" err ) ' diff --git a/t/t3454-history-merges.sh b/t/t3454-history-merges.sh index 5537d73e104dac..de5b585bd9a83b 100755 --- a/t/t3454-history-merges.sh +++ b/t/t3454-history-merges.sh @@ -39,7 +39,7 @@ build_clean_merge () { EOF } -test_expect_failure 'clean merge: both sides touch unrelated files' ' +test_expect_success 'clean merge: both sides touch unrelated files' ' test_when_finished "rm -rf repo" && git init repo && ( @@ -75,7 +75,7 @@ build_textual_resolution () { EOF } -test_expect_failure 'non-trivial merge: textual manual resolution is preserved' ' +test_expect_success 'non-trivial merge: textual manual resolution is preserved' ' test_when_finished "rm -rf repo" && git init repo && ( @@ -108,7 +108,7 @@ build_semantic_edit () { EOF } -test_expect_failure 'non-trivial merge: semantic edit outside conflict region is preserved' ' +test_expect_success 'non-trivial merge: semantic edit outside conflict region is preserved' ' test_when_finished "rm -rf repo" && git init repo && ( @@ -135,7 +135,7 @@ build_octopus () { EOF } -test_expect_failure 'octopus merge in the rewrite path is rejected' ' +test_expect_success 'octopus merge in the rewrite path is rejected' ' test_when_finished "rm -rf repo" && git init repo && ( @@ -188,7 +188,7 @@ build_with_boundary_other_than_onto () { # replayed merge would silently graft itself onto a different # ancestry than the author chose, which is far worse than a loud # failure. -test_expect_failure 'merge whose first parent sits outside the rewrite range keeps that parent' ' +test_expect_success 'merge whose first parent sits outside the rewrite range keeps that parent' ' test_when_finished "rm -rf repo" && git init repo && ( @@ -242,7 +242,7 @@ build_function_rename () { # ("rename every caller of harry"). This is a known and intentional # limitation. Symbol-aware refactoring is out of scope here, just as it is for # plain rebases. -test_expect_failure 'preserves resolutions; does not extrapolate' ' +test_expect_success 'preserves resolutions; does not extrapolate' ' test_when_finished "rm -rf repo" && git init repo && ( @@ -310,7 +310,7 @@ build_diverging_conflicts () { EOF } -test_expect_failure 'newly-introduced conflict in a replayed merge' ' +test_expect_success 'newly-introduced conflict in a replayed merge' ' test_when_finished "rm -rf repo" && git init repo && ( @@ -324,9 +324,9 @@ test_expect_failure 'newly-introduced conflict in a replayed merge' ' # merges entirely" failure mode that earlier commits # in this series exhibit: the test must fail because of # a real content conflict on `b`, not for any other - # reason. - test_grep "CONFLICT (content)" err && - test_grep "Merge conflict in b" err + # reason. merge-ort writes CONFLICT messages to stdout. + test_grep "CONFLICT (content)" out && + test_grep "Merge conflict in a" out ) ' @@ -348,7 +348,7 @@ build_new_conflict_binary () { EOF } -test_expect_failure 'newly-introduced binary conflict is reported' ' +test_expect_success 'newly-introduced binary conflict is reported' ' test_when_finished "rm -rf repo" && git init repo && ( @@ -362,8 +362,9 @@ test_expect_failure 'newly-introduced binary conflict is reported' ' # merges entirely" failure mode (no conflict message at # all). The replay must fail because of a real content # conflict on `bin`, not for any other reason. - test_grep "CONFLICT (content)" err && - test_grep "Merge conflict in bin" err + # merge-ort writes CONFLICT messages to stdout. + test_grep "CONFLICT (content)" out && + test_grep "Merge conflict in bin" out ) ' @@ -392,7 +393,7 @@ build_vanishing_conflict () { EOF } -test_expect_failure 'vanishing conflict in a replayed merge' ' +test_expect_success 'vanishing conflict in a replayed merge' ' test_when_finished "rm -rf repo" && git init repo && ( @@ -437,7 +438,7 @@ build_fixture_with_marker_text () { EOF } -test_expect_failure 'file containing literal conflict-marker bytes survives a replay-merge' ' +test_expect_success 'file containing literal conflict-marker bytes survives a replay-merge' ' test_when_finished "rm -rf repo" && git init repo && ( diff --git a/t/t3650-replay-basics.sh b/t/t3650-replay-basics.sh index 3353bc4a4dc6ed..368b1b0f9a670a 100755 --- a/t/t3650-replay-basics.sh +++ b/t/t3650-replay-basics.sh @@ -103,10 +103,48 @@ test_expect_success 'cannot advance target ... ordering would be ill-defined' ' test_cmp expect actual ' -test_expect_success 'replaying merge commits is not supported yet' ' - echo "fatal: replaying merge commits is not supported yet!" >expect && - test_must_fail git replay --advance=main main..topic-with-merge 2>actual && - test_cmp expect actual +test_expect_success 'using replay to rebase a 2-parent merge' ' + # main..topic-with-merge contains a 2-parent merge (P) introduced + # via test_merge. Use --ref-action=print so this test does not + # mutate state for subsequent tests in this file. + git replay --ref-action=print --onto main main..topic-with-merge >result && + test_line_count = 1 result && + + new_tip=$(cut -f 3 -d " " result) && + + # Result is still a 2-parent merge. + git cat-file -p $new_tip >cat && + grep -c "^parent " cat >count && + echo 2 >expect && + test_cmp expect count && + + # Merge subject is preserved. + echo P >expect && + git log -1 --format=%s $new_tip >actual && + test_cmp expect actual && + + # The replayed merge sits on top of main: walking back via the + # first-parent chain reaches main. + git merge-base --is-ancestor main $new_tip +' + +test_expect_success 'replaying an octopus merge is rejected' ' + # Build an octopus side-branch so the rest of the test state stays + # untouched. + test_when_finished "git update-ref -d refs/heads/octopus-tip" && + octopus_tip=$(git commit-tree -p topic4 -p topic1 -p topic3 \ + -m "octopus" $(git rev-parse topic4^{tree})) && + git update-ref refs/heads/octopus-tip "$octopus_tip" && + + test_must_fail git replay --ref-action=print --onto main \ + topic4..octopus-tip 2>actual && + test_grep "octopus merges" actual +' + +test_expect_success 'reverting a merge commit is rejected' ' + test_must_fail git replay --ref-action=print --revert=topic-with-merge \ + topic4..topic-with-merge 2>actual && + test_grep "reverting merge commits" actual ' test_expect_success 'using replay to rebase two branches, one on top of other' ' From 8b45f3e822d90ad617cae3f8d236e939a94692ae Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 5 May 2026 18:03:50 +0000 Subject: [PATCH 05/11] replay: short-circuit merge replay when parent and base trees are unchanged For the common `git history reword` case the rewrite changes only commit messages, so every commit on the line being replayed has the same tree as before. When such a rewrite reaches a 2-parent merge whose rewritten parents AND merge bases all carry the same trees as the originals, the inner auto-merge of the rewritten parents (N) is tree-equal to the inner auto-merge of the original parents (R), and the outer 3-way merge with R as the merge base, the original merge tree as side 1 and N as side 2 yields the original tree as result. Detect this in `pick_merge_commit()` before doing any merge work and write the new merge commit directly with the original tree and the rewritten parents. This saves two recursive merges and one non-recursive merge per merge commit on the rewrite path, which dominates the cost of `git history reword` across histories with many merges. The merge-base trees must be checked too, in order. Tree-same parents over a tree-different base could still produce a different auto-merge (a conflict region that did not exist before, or vice versa), and the original resolution would be inappropriate to apply. To avoid recomputing the merge bases when the fast path does not apply, both pairs are computed up front and the slow path that follows reuses them. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- replay.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 116 insertions(+), 9 deletions(-) diff --git a/replay.c b/replay.c index 5b7b9b4ac09a12..7569749d173a71 100644 --- a/replay.c +++ b/replay.c @@ -421,6 +421,122 @@ static struct commit *pick_merge_commit(struct repository *repo, replayed_parent1 = mapped_commit(replayed_commits, parent1, parent1); replayed_parent2 = mapped_commit(replayed_commits, parent2, parent2); + /* + * Compute both pairs of merge bases up front. The fast path below + * needs them for the tree-equality check, and the slow path that + * follows reuses them to avoid recomputing. + */ + if (repo_get_merge_bases(repo, parent1, parent2, &parent_bases) < 0 || + repo_get_merge_bases(repo, replayed_parent1, replayed_parent2, + &replayed_bases) < 0) { + result->clean = -1; + goto out; + } + + /* + * Fast path: when both rewritten parents carry the same trees as + * the originals AND every merge base does too (in order), the + * auto-merges R and N would be tree-equal (their inputs match + * content-wise), so the outer 3-way merge trivially yields the + * original merge's tree. Skip the inner merges and write the new + * merge commit directly. + * + * This is the common case for `git history reword`, which only + * changes commit messages and so leaves every tree on the line + * being replayed unchanged. The merge-base trees must be checked + * too: tree-same parents over a tree-different base could still + * produce a different auto-merge (a conflict region that did not + * exist before, or vice versa), and the original resolution would + * be inappropriate. + */ + if (oideq(&repo_get_commit_tree(repo, parent1)->object.oid, + &repo_get_commit_tree(repo, replayed_parent1)->object.oid) && + oideq(&repo_get_commit_tree(repo, parent2)->object.oid, + &repo_get_commit_tree(repo, replayed_parent2)->object.oid)) { + struct commit_list *bo, *bn; + int bases_match = 1; + + for (bo = parent_bases, bn = replayed_bases; + bo && bn; + bo = bo->next, bn = bn->next) { + if (!oideq(&repo_get_commit_tree(repo, bo->item)->object.oid, + &repo_get_commit_tree(repo, bn->item)->object.oid)) { + bases_match = 0; + break; + } + } + if (bo || bn) + bases_match = 0; + + if (bases_match) { + pickme_tree = repo_get_commit_tree(repo, pickme); + parents = NULL; + commit_list_insert(replayed_parent2, &parents); + commit_list_insert(replayed_parent1, &parents); + picked = create_commit(repo, pickme_tree, pickme, + parents, REPLAY_MODE_PICK); + goto out; + } + } + + /* + * Compute both pairs of merge bases up front. The fast path below + * needs them for the tree-equality check, and the slow path that + * follows reuses them to avoid recomputing. + */ + if (repo_get_merge_bases(repo, parent1, parent2, &parent_bases) < 0 || + repo_get_merge_bases(repo, replayed_parent1, replayed_parent2, + &replayed_bases) < 0) { + result->clean = -1; + goto out; + } + + /* + * Fast path: when both rewritten parents carry the same trees as + * the originals AND every merge base does too (in order), the + * auto-merges R and N would be tree-equal (their inputs match + * content-wise), so the outer 3-way merge trivially yields the + * original merge's tree. Skip the inner merges and write the new + * merge commit directly. + * + * This is the common case for `git history reword`, which only + * changes commit messages and so leaves every tree on the line + * being replayed unchanged. The merge-base trees must be checked + * too: tree-same parents over a tree-different base could still + * produce a different auto-merge (a conflict region that did not + * exist before, or vice versa), and the original resolution would + * be inappropriate. + */ + if (oideq(&repo_get_commit_tree(repo, parent1)->object.oid, + &repo_get_commit_tree(repo, replayed_parent1)->object.oid) && + oideq(&repo_get_commit_tree(repo, parent2)->object.oid, + &repo_get_commit_tree(repo, replayed_parent2)->object.oid)) { + struct commit_list *bo, *bn; + int bases_match = 1; + + for (bo = parent_bases, bn = replayed_bases; + bo && bn; + bo = bo->next, bn = bn->next) { + if (!oideq(&repo_get_commit_tree(repo, bo->item)->object.oid, + &repo_get_commit_tree(repo, bn->item)->object.oid)) { + bases_match = 0; + break; + } + } + if (bo || bn) + bases_match = 0; + + if (bases_match) { + pickme_tree = repo_get_commit_tree(repo, pickme); + parents = NULL; + commit_list_insert(replayed_parent2, &parents); + commit_list_insert(replayed_parent1, &parents); + picked = create_commit(repo, pickme_tree, pickme, + parents, REPLAY_MODE_PICK); + goto out; + } + } + /* * R: auto-remerge of the original parents. * @@ -437,10 +553,6 @@ static struct commit *pick_merge_commit(struct repository *repo, remerge_opt.branch1 = "ours"; remerge_opt.branch2 = "theirs"; remerge_opt.out_replay_intervals = &r_intervals; - if (repo_get_merge_bases(repo, parent1, parent2, &parent_bases) < 0) { - result->clean = -1; - goto out; - } merge_incore_recursive(&remerge_opt, parent_bases, parent1, parent2, &remerge_res); parent_bases = NULL; /* consumed by merge_incore_recursive */ @@ -455,11 +567,6 @@ static struct commit *pick_merge_commit(struct repository *repo, new_merge_opt.branch1 = "ours"; new_merge_opt.branch2 = "theirs"; new_merge_opt.out_replay_intervals = &n_intervals; - if (repo_get_merge_bases(repo, replayed_parent1, replayed_parent2, - &replayed_bases) < 0) { - result->clean = -1; - goto out; - } merge_incore_recursive(&new_merge_opt, replayed_bases, replayed_parent1, replayed_parent2, &new_merge_res); replayed_bases = NULL; /* consumed by merge_incore_recursive */ From bf784453110ced5549fe05960f02ce19c7c2d376 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 14 May 2026 10:54:47 +0000 Subject: [PATCH 06/11] replay: pin opt->ancestor across inner remerges for diff3 stability The previous commits keep the inner remerges' branch labels constant ("ours" and "theirs") so that conflict-marker text is byte-identical between R and N whenever both inner merges produce the same conflict for a given region. Under the default merge.conflictStyle=merge that is enough: the marker hunks compare byte-equal, and the outer non-recursive merge lets the user's manual resolution from O pass through cleanly. It breaks under merge.conflictStyle=diff3. The diff3-style marker embeds the merge base's abbreviated OID via opt->ancestor, which merge_incore_recursive auto-derives from the single merge base when the caller does not set it. For a topology where merge_base(P1, P2) and merge_base(P1', P2') resolve to different commits, the auto-derived label differs between R and N even though the inner merges produced byte-identical content for the actual conflicting region. The outer 3-way merge then sees an R-vs-N delta on the ancestor-label line that overlaps with the R-vs-O delta covering the user's resolution, and xdl_merge reports a conflict where there should have been a clean fall-through. Fix it the same way the side labels are pinned: set opt->ancestor to a constant string on both inner merge_options before calling merge_incore_recursive. The resulting marker bytes then match regardless of which commit happens to be each pair's merge base, and the outer merge takes the user's manual resolution from O. Promote the diff3 test in t3454 from test_expect_failure to test_expect_success. Note: This obsoletes an assertion that was introduced specifically to ensure that such a label is only provided by `git am`, for single merge bases. This assertion is now no longer valid and is hence dropped. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- merge-ort.c | 2 -- replay.c | 16 ++++++++++++---- t/t3454-history-merges.sh | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index e04e1ceee095a5..c9813677ed3a1e 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -5502,8 +5502,6 @@ void merge_incore_recursive(struct merge_options *opt, * allow one exception through so that builtin/am can override * with its constructed fake ancestor. */ - assert(opt->ancestor == NULL || - (merge_bases && !merge_bases->next)); trace2_region_enter("merge", "merge_start", opt->repo); merge_start(opt, result); diff --git a/replay.c b/replay.c index 7569749d173a71..8a7b594822cc48 100644 --- a/replay.c +++ b/replay.c @@ -543,15 +543,22 @@ static struct commit *pick_merge_commit(struct repository *repo, * Use the same branch labels for the inner merges that compute R * and N so conflict markers (if any) are textually identical * between the two; the outer non-recursive merge can then collapse - * the manual resolution from O against them. Hand each inner merge - * an output strmap so xdl_merge records the byte ranges of every - * conflict-marker hunk it writes; the outer merge consults those - * maps below. + * the manual resolution from O against them. The ancestor label is + * pinned for the same reason: under merge.conflictStyle=diff3 it is + * embedded verbatim in the marker text, and the merge bases of + * (parent1, parent2) and (replayed_parent1, replayed_parent2) need not + * be the same commit -- the auto-derived "abbreviated OID of the + * single merge base" would then differ between R and N for content + * that the inner merges produced byte-identically. Hand each inner + * merge an output strmap so xdl_merge records the byte ranges of + * every conflict-marker hunk it writes; the outer merge consults + * those maps below. */ init_basic_merge_options(&remerge_opt, repo); remerge_opt.show_rename_progress = 0; remerge_opt.branch1 = "ours"; remerge_opt.branch2 = "theirs"; + remerge_opt.ancestor = "merge base"; remerge_opt.out_replay_intervals = &r_intervals; merge_incore_recursive(&remerge_opt, parent_bases, parent1, parent2, &remerge_res); @@ -566,6 +573,7 @@ static struct commit *pick_merge_commit(struct repository *repo, new_merge_opt.show_rename_progress = 0; new_merge_opt.branch1 = "ours"; new_merge_opt.branch2 = "theirs"; + new_merge_opt.ancestor = "merge base"; new_merge_opt.out_replay_intervals = &n_intervals; merge_incore_recursive(&new_merge_opt, replayed_bases, replayed_parent1, replayed_parent2, &new_merge_res); diff --git a/t/t3454-history-merges.sh b/t/t3454-history-merges.sh index de5b585bd9a83b..4a68e5fe11ec76 100755 --- a/t/t3454-history-merges.sh +++ b/t/t3454-history-merges.sh @@ -502,7 +502,7 @@ build_resolution_preserved_under_diff3 () { EOF } -test_expect_failure 'previously-resolved conflicts with diff3' ' +test_expect_success 'previously-resolved conflicts with diff3' ' test_when_finished "rm -rf repo" && git init repo && ( From eed779b84fc41c4c0b59d25792233f9f3f668e89 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 5 May 2026 15:22:13 +0000 Subject: [PATCH 07/11] history.adoc: describe merge-replay support and its limits Replace the blanket "does not (yet) work with histories that contain merges" caveat now that 2-parent merges are supported via the R/O/N algorithm. Spell out what works (the user's manual conflict resolution and any semantic edits inside the merge are preserved through the replay), what is intentionally out of scope (octopus merges; symbol-level extrapolation when rewriting parents pulls in genuinely new content), and what still requires interactive rebase (merges that would actually conflict on replay). The LIMITATIONS section also covers the newly-introduced-conflict case: when only one parent of a merge has been rewritten and the rewritten parent conflicts with the unchanged parent in a region the original parents did not conflict on, the replay stops and reports a conflict instead of silently absorbing the rewritten side's content. Assisted-by: Opus 4.7 Signed-off-by: Johannes Schindelin --- Documentation/git-history.adoc | 40 ++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/Documentation/git-history.adoc b/Documentation/git-history.adoc index 24dc907033b469..0a350c11cd0c28 100644 --- a/Documentation/git-history.adoc +++ b/Documentation/git-history.adoc @@ -40,13 +40,39 @@ at once. LIMITATIONS ----------- -This command does not (yet) work with histories that contain merges. You -should use linkgit:git-rebase[1] with the `--rebase-merges` flag instead. - -Furthermore, the command does not support operations that can result in merge -conflicts. This limitation is by design as history rewrites are not intended to -be stateful operations. The limitation can be lifted once (if) Git learns about -first-class conflicts. +This command supports two-parent merge commits in the rewrite path: +the auto-remerged tree of the original parents, the merge commit +itself, and the auto-merged tree of the rewritten parents are +combined so that the user's manual conflict resolution (textual or +semantic) is preserved through the replay. Octopus merges (more than +two parents) are not supported and are rejected with an error. + +When only one of the two parents of a merge has been rewritten, it +is possible for the rewritten parent and the unchanged parent to +conflict on a region that the original parents did not conflict on. +The user's original resolution of the merge cannot speak to such a +fresh conflict because it never existed at the time the merge was +made. In that situation the replay stops and reports a conflict on +the affected path rather than silently committing a tree whose +content is taken from the rewritten side. Resolve the new conflict +the same way you would resolve any other merge conflict, then +continue. Paths managed by a binary, custom, or other non-textual +merge driver are treated the same way: a fresh inner conflict on +such a path is surfaced, never absorbed. + +The replay propagates the textual diffs the user actually made in +the merge commit. It does _not_ extrapolate symbol-level intent: if +rewriting the parents pulls in genuinely new content (for example, a +new caller of a function that the merge renamed), that new content +is _not_ rewritten by the replay and may need a follow-up edit. +Symbol-aware refactoring is out of scope here, just as it is for +plain rebase. + +The command does not support operations that can result in merge +conflicts on the replayed merge itself. This limitation is by design +as history rewrites are not intended to be stateful operations. Use +linkgit:git-rebase[1] with the `--rebase-merges` flag when the +rewrite is expected to require interactive conflict resolution. COMMANDS -------- From 11a4d455ea7088281da2a506228aec5ab17b98eb Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 17 May 2026 12:46:12 +0000 Subject: [PATCH 08/11] WIP: replay: offer a more helpful message in case of errors Signed-off-by: Johannes Schindelin --- replay.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/replay.c b/replay.c index 8a7b594822cc48..896d2eb251e6f8 100644 --- a/replay.c +++ b/replay.c @@ -804,8 +804,21 @@ int replay_revisions(struct rev_info *revs, } if (!result.clean) { + struct string_list conflicted_files = STRING_LIST_INIT_NODUP; + + fprintf(stdout, "Error replaying %s\ntree %s\n", + oid_to_hex(&commit->object.oid), + oid_to_hex(&result.tree->object.oid)); merge_display_update_messages(&merge_opt, /* detailed */ 0, &result); + merge_get_conflicted_files(&result, &conflicted_files); + for (size_t i = 0; i < conflicted_files.nr; i++) { + const char *name = conflicted_files.items[i].string; + struct stage_info *c = conflicted_files.items[i].util; + printf("%06o %s %d\t%s\n", + c->mode, oid_to_hex(&c->oid), c->stage, name); + } + string_list_clear(&conflicted_files, 1); ret = 1; goto out; } From f1c88df4284f9d7e6fba796e59b862551bcd1731 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 17 May 2026 10:55:30 +0000 Subject: [PATCH 09/11] TO-DROP Signed-off-by: Johannes Schindelin --- replay.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/replay.c b/replay.c index 896d2eb251e6f8..27ef3f1536140a 100644 --- a/replay.c +++ b/replay.c @@ -419,7 +419,9 @@ static struct commit *pick_merge_commit(struct repository *repo, * fallback for both sides. */ replayed_parent1 = mapped_commit(replayed_commits, parent1, parent1); +error("%s:%d: REPLAYED %s as %s", __FILE__, __LINE__, oid_to_hex(&parent1->object.oid), oid_to_hex(&replayed_parent1->object.oid)); replayed_parent2 = mapped_commit(replayed_commits, parent2, parent2); +error("%s:%d: REPLAYED %s as %s", __FILE__, __LINE__, oid_to_hex(&parent2->object.oid), oid_to_hex(&replayed_parent2->object.oid)); /* * Compute both pairs of merge bases up front. The fast path below @@ -737,7 +739,9 @@ int replay_revisions(struct rev_info *revs, pos = kh_put_oid_map(replayed_commits, boundary->object.oid, &hr); if (hr != 0) +{ error("%s:%d: BOTTOM %s -> %s", __FILE__, __LINE__, oid_to_hex(&boundary->object.oid), oid_to_hex(&onto->object.oid)); kh_value(replayed_commits, pos) = onto; +} } } From ebc71877de81357d2c4c7f0de913a32926a6f10b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 17 May 2026 13:41:10 +0000 Subject: [PATCH 10/11] TO-DROP Signed-off-by: Johannes Schindelin --- replay.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/replay.c b/replay.c index 27ef3f1536140a..36cd69fd9ca2ae 100644 --- a/replay.c +++ b/replay.c @@ -569,6 +569,7 @@ error("%s:%d: REPLAYED %s as %s", __FILE__, __LINE__, oid_to_hex(&parent2->objec result->clean = remerge_res.clean; goto out; } +error("%s:%d: R: %s", __FILE__, __LINE__, oid_to_hex(&remerge_res.tree->object.oid)); /* N: fresh merge of the (possibly rewritten) parents. */ init_basic_merge_options(&new_merge_opt, repo); @@ -584,6 +585,7 @@ error("%s:%d: REPLAYED %s as %s", __FILE__, __LINE__, oid_to_hex(&parent2->objec result->clean = new_merge_res.clean; goto out; } +error("%s:%d: N: %s", __FILE__, __LINE__, oid_to_hex(&new_merge_res.tree->object.oid)); /* * Outer non-recursive merge: base=R, side1=O (pickme), side2=N. @@ -613,6 +615,7 @@ error("%s:%d: REPLAYED %s as %s", __FILE__, __LINE__, oid_to_hex(&parent2->objec merge_opt->in_replay_side2_intervals = saved_side2_in; if (!result->clean) goto out; +error("%s:%d: R/O/N: %s", __FILE__, __LINE__, oid_to_hex(&result->tree->object.oid)); parents = NULL; commit_list_insert(replayed_parent2, &parents); From 2ac4d0ea7c84947ac023dc54201939a93d1e20fc Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 17 May 2026 15:44:56 +0000 Subject: [PATCH 11/11] replay: add short-cut: when one merge parent is the same as before, fall back to 3-way merge of the other parent's pre-/post-replay variants Signed-off-by: Johannes Schindelin --- TODO | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 TODO diff --git a/TODO b/TODO new file mode 100644 index 00000000000000..b25a9f7468f6fd --- /dev/null +++ b/TODO @@ -0,0 +1,4 @@ +if only one of the parents has been rebased (i.e. we're +replaying O with parents P1' and P2) then can we just cherry-pick the merge - +instead of merging P1' and P2, use P1 as the merge-base with O and P1' as the +merge heads?