Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,9 @@ private static int calculateOffset(List<String> contentLines, TSPoint point) {
for (int i = 0; i < startRow; i++) {
// Each line in contentLines (except maybe the last) was terminated by LF (\n).
// If the original was CRLF, the CR (\r) is still at the end of the line string.
// .getBytes().length + 1 correctly counts [LineContent] + [LF].
offset += contentLines.get(i).getBytes(StandardCharsets.UTF_8).length + 1;
// offset must be a char (UTF-16 code unit) index, matching how consumers (e.g.
// VanillaDiffHtmlBuilder) walk the source, so use .length() and not byte length.
offset += contentLines.get(i).length() + 1;
}
offset += startColumn;
return offset;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,17 @@ public void testCrlfOffsetConsistency() throws IOException {
@Test
public void testMultiByteOffsetConsistency() throws IOException {
// Line 1: "# 🐍\n"
// '#' (1) + ' ' (1) + '🐍' (4 bytes in UTF-8) + '\n' (1) = 7 bytes total
// '#' (1) + ' ' (1) + '🐍' (2 UTF-16 chars, surrogate pair) + '\n' (1) = 5 chars total
// Line 2: "x = 1"
// Offsets must be char-based (UTF-16 code units) to match how the rest of GumTree
// (e.g. AbstractJdtVisitor, VanillaDiffHtmlBuilder) indexes source text, not UTF-8 bytes.
String content = "# 🐍\nx = 1";
TreeContext ctx = generator.generateFrom().string(content);

Tree xAssignment = ctx.getRoot().getChild(1);
assertEquals("expression_statement", xAssignment.getType().name);
assertEquals(7, xAssignment.getPos(), "Line 2 should start at byte offset 7 after a 4-byte emoji and LF");
assertEquals(5, xAssignment.getPos(),
"Line 2 should start at char offset 5 after a surrogate-pair emoji and LF");
}

@Test
Expand Down