diff --git a/src/main/java/com/hubspot/jinjava/Jinjava.java b/src/main/java/com/hubspot/jinjava/Jinjava.java index a5d834b48..f3ac8a4fb 100644 --- a/src/main/java/com/hubspot/jinjava/Jinjava.java +++ b/src/main/java/com/hubspot/jinjava/Jinjava.java @@ -250,7 +250,7 @@ public RenderResult renderForResult( .getInterpreterFactory() .newInstance(this, context, renderConfig); try { - String result = interpreter.render(template); + String result = stripTrailingNewlineIfNeeded(interpreter.render(template)); return new RenderResult( result, interpreter.getContext(), @@ -293,6 +293,18 @@ public RenderResult renderForResult( } } + /** + * Strips a single trailing newline from the rendered output when + * {@code keepTrailingNewline} is {@code false} in {@link Config}, + * matching Python Jinja2's default behaviour. + */ + private String stripTrailingNewlineIfNeeded(String output) { + if (!globalConfig.isKeepTrailingNewline() && output.endsWith("\n")) { + return output.substring(0, output.length() - 1); + } + return output; + } + /** * Creates a new interpreter instance using the global context and global config * diff --git a/src/main/java/com/hubspot/jinjava/JinjavaConfig.java b/src/main/java/com/hubspot/jinjava/JinjavaConfig.java index ccfaf7ca5..f128cb9e5 100644 --- a/src/main/java/com/hubspot/jinjava/JinjavaConfig.java +++ b/src/main/java/com/hubspot/jinjava/JinjavaConfig.java @@ -203,6 +203,17 @@ default boolean isEnableFilterChainOptimization() { return false; } + /** + * When {@code false} (default), a single trailing newline is stripped from the rendered + * output, matching Python Jinja2's default. + * When {@code true}, the trailing newline of + * the rendered output is preserved — matching Jinjava's historical behaviour. + */ + @Value.Default + default boolean isKeepTrailingNewline() { + return false; + } + @Value.Default default ObjectMapper getObjectMapper() { ObjectMapper objectMapper = new ObjectMapper().registerModule(new Jdk8Module()); diff --git a/src/main/java/com/hubspot/jinjava/LegacyOverrides.java b/src/main/java/com/hubspot/jinjava/LegacyOverrides.java index bd3732455..ba9278127 100644 --- a/src/main/java/com/hubspot/jinjava/LegacyOverrides.java +++ b/src/main/java/com/hubspot/jinjava/LegacyOverrides.java @@ -21,6 +21,7 @@ public interface LegacyOverrides extends WithLegacyOverrides { .withAllowAdjacentTextNodes(true) .withUseTrimmingForNotesAndExpressions(true) .withKeepNullableLoopValues(true) + .withHandleBackslashInQuotesOnly(true) .build(); LegacyOverrides ALL = new Builder() .withEvaluateMapKeys(true) @@ -32,6 +33,7 @@ public interface LegacyOverrides extends WithLegacyOverrides { .withAllowAdjacentTextNodes(true) .withUseTrimmingForNotesAndExpressions(true) .withKeepNullableLoopValues(true) + .withHandleBackslashInQuotesOnly(true) .build(); @Value.Default @@ -79,6 +81,23 @@ default boolean isKeepNullableLoopValues() { return false; } + /** + * When {@code true}, the token scanner treats backslash as an escape character + * only inside quoted string literals, leaving bare backslashes outside quotes + * untouched for the expression parser (JUEL) to handle. This matches the + * behaviour of Python's Jinja2, where the template scanner is not responsible + * for backslash interpretation at all. + * + *
When {@code false} (the default), the scanner consumes a backslash and
+ * the following character unconditionally, regardless of quote context. This
+ * is the legacy Jinjava behaviour, which prevents closing delimiters from
+ * being recognized after a backslash but diverges from Jinja2.
+ */
+ @Value.Default
+ default boolean isHandleBackslashInQuotesOnly() {
+ return false;
+ }
+
class Builder extends ImmutableLegacyOverrides.Builder {}
static Builder newBuilder() {
diff --git a/src/main/java/com/hubspot/jinjava/tree/TreeParser.java b/src/main/java/com/hubspot/jinjava/tree/TreeParser.java
index 56f11003c..fedadbaf9 100644
--- a/src/main/java/com/hubspot/jinjava/tree/TreeParser.java
+++ b/src/main/java/com/hubspot/jinjava/tree/TreeParser.java
@@ -32,6 +32,7 @@
import com.hubspot.jinjava.lib.tag.FlexibleTag;
import com.hubspot.jinjava.lib.tag.Tag;
import com.hubspot.jinjava.tree.parse.ExpressionToken;
+import com.hubspot.jinjava.tree.parse.StringTokenScanner;
import com.hubspot.jinjava.tree.parse.TagToken;
import com.hubspot.jinjava.tree.parse.TextToken;
import com.hubspot.jinjava.tree.parse.Token;
@@ -39,6 +40,7 @@
import com.hubspot.jinjava.tree.parse.TokenScannerSymbols;
import com.hubspot.jinjava.tree.parse.UnclosedToken;
import com.hubspot.jinjava.tree.parse.WhitespaceControlParser;
+import java.util.Iterator;
import org.apache.commons.lang3.StringUtils;
public class TreeParser {
@@ -52,7 +54,7 @@ public class TreeParser {
public TreeParser(JinjavaInterpreter interpreter, String input) {
this.scanner =
- Iterators.peekingIterator(new TokenScanner(input, interpreter.getConfig()));
+ Iterators.peekingIterator(createScanner(input, interpreter.getConfig()));
this.interpreter = interpreter;
this.symbols = interpreter.getConfig().getTokenScannerSymbols();
this.whitespaceControlParser =
@@ -104,6 +106,13 @@ public Node buildTree() {
return root;
}
+ private static Iterator Unlike the character-based {@link TokenScanner}, this scanner matches
+ * multi-character delimiter strings directly (e.g. {@code \VAR{} / {@code }},
+ * {@code \BLOCK{} / {@code }}) without relying on a shared prefix character. It also
+ * supports optional {@link TokenScannerSymbols#getLineStatementPrefix() line statement}
+ * and {@link TokenScannerSymbols#getLineCommentPrefix() line comment} prefixes,
+ * matching Python Jinja2 semantics.
+ *
+ * {@link TreeParser} selects this scanner automatically when
+ * {@code symbols.isStringBased()} is {@code true}; callers never instantiate it
+ * directly.
+ */
+public class StringTokenScanner extends AbstractIterator Line comments match anywhere on a line (not just at the start).
+ * For mid-line comments, everything from the prefix to end of line is
+ * stripped; the text before the prefix on the same line is kept.
+ *
+ * Confirmed Python Jinja2 semantics:
+ * Python Jinja2 semantics for {@code %#-}: strip back through any preceding
+ * blank lines AND the {@code \n} that ends the last real-content line, so that
+ * the comment's own kept {@code \n} becomes the sole separator. Stops at
+ * {@code tokenStart} so that {@code \n}s produced by preceding line statements
+ * or plain {@code %#} comments are not consumed.
+ *
+ * Examples (| marks the flush boundary):
+ * Unlike {@link DefaultTokenScannerSymbols}, which is constrained to single-character
+ * prefixes and postfixes, this class allows any non-empty string for each of the six
+ * delimiter roles. The delimiters do not need to share a common prefix character.
+ *
+ * {@link TokenScanner} detects this class via {@link #isStringBased()} and activates
+ * a string-matching scan path. {@link ExpressionToken}, {@link TagToken}, and
+ * {@link NoteToken} use the length accessors on {@link TokenScannerSymbols} (e.g.
+ * {@link #getExpressionStartLength()}) to strip delimiters correctly regardless of length.
+ *
+ * The single-character abstract methods inherited from {@link TokenScannerSymbols}
+ * return private Unicode Private-Use-Area sentinel values. These are used only as
+ * token-kind discriminators inside {@link Token#newToken} and must never be used for
+ * scanning template text.
+ *
+ * When {@link TokenScannerSymbols#isStringBased()} is {@code true},
+ * {@link TreeParser} uses {@link StringTokenScanner} instead.
+ */
public class TokenScanner extends AbstractIterator The default returns {@code false}, so all existing subclasses are unaffected.
+ * {@link StringTokenScannerSymbols} overrides this to return {@code true}.
+ */
+ public boolean isStringBased() {
+ return false;
+ }
+
+ /**
+ * Length of the variable/expression opening delimiter (e.g. 2 for {@code "{{"}),
+ * used by {@link ExpressionToken#parse()} instead of the hardcoded constant 2.
+ */
+ public int getExpressionStartLength() {
+ return getExpressionStart().length();
+ }
+
+ /**
+ * Length of the variable/expression closing delimiter (e.g. 2 for {@code "}}"}),
+ * used by {@link ExpressionToken#parse()} instead of the hardcoded constant 2.
+ */
+ public int getExpressionEndLength() {
+ return getExpressionEnd().length();
+ }
+
+ /**
+ * Length of the block/tag opening delimiter (e.g. 2 for {@code "{%"}),
+ * used by {@link TagToken#parse()} instead of the hardcoded constant 2.
+ */
+ public int getTagStartLength() {
+ return getExpressionStartWithTag().length();
+ }
+
+ /**
+ * Length of the block/tag closing delimiter (e.g. 2 for {@code "%}"}),
+ * used by {@link TagToken#parse()} instead of the hardcoded constant 2.
+ */
+ public int getTagEndLength() {
+ return getExpressionEndWithTag().length();
+ }
+
+ /**
+ * Length of the comment opening delimiter (e.g. 2 for {@code "{#"}),
+ * used by {@link NoteToken#parse()} instead of the hardcoded constant 2.
+ */
+ public int getCommentStartLength() {
+ return getOpeningComment().length();
+ }
+
+ /**
+ * Length of the comment closing delimiter (e.g. 2 for {@code "#}"}),
+ * used by {@link NoteToken#parse()} instead of the hardcoded constant 2.
+ */
+ public int getCommentEndLength() {
+ return getClosingComment().length();
+ }
+
+ /**
+ * Optional line statement prefix (e.g. {@code "%%"}). When non-null, any line
+ * that begins with this prefix (after optional horizontal whitespace) is treated
+ * as a block tag statement, equivalent to wrapping its content in the block
+ * delimiters. Returns {@code null} by default (feature disabled).
+ *
+ * Only used by {@link StringTokenScannerSymbols}; has no effect in the
+ * char-based path.
+ */
+ public String getLineStatementPrefix() {
+ return null;
+ }
+
+ /**
+ * Optional line comment prefix (e.g. {@code "%#"}). When non-null, any line
+ * that begins with this prefix (after optional horizontal whitespace) is stripped
+ * entirely from the output. Returns {@code null} by default (feature disabled).
+ *
+ * Only used by {@link StringTokenScannerSymbols}; has no effect in the
+ * char-based path.
+ */
+ public String getLineCommentPrefix() {
+ return null;
+ }
}
diff --git a/src/test/java/com/hubspot/jinjava/TrailingNewlineTest.java b/src/test/java/com/hubspot/jinjava/TrailingNewlineTest.java
new file mode 100644
index 000000000..9d43062d5
--- /dev/null
+++ b/src/test/java/com/hubspot/jinjava/TrailingNewlineTest.java
@@ -0,0 +1,73 @@
+package com.hubspot.jinjava;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import com.google.common.collect.ImmutableMap;
+import com.hubspot.jinjava.Jinjava;
+import java.util.HashMap;
+import org.junit.Test;
+
+public class TrailingNewlineTest {
+
+ private static final String TEMPLATE_WITH_TRAILING_NEWLINE = "hello\n";
+ private static final String TEMPLATE_WITHOUT_TRAILING_NEWLINE = "hello";
+ private static final String TEMPLATE_MULTIPLE_TRAILING_NEWLINES = "hello\n\n";
+
+ // ── keepTrailingNewline=true (legacy default: preserve \n) ─────────────────
+
+ @Test
+ public void itKeepsTrailingNewlineIsTrue() {
+ Jinjava jinjava = new Jinjava(
+ JinjavaConfig.newBuilder().withKeepTrailingNewline(true).build()
+ );
+ assertThat(jinjava.render(TEMPLATE_WITH_TRAILING_NEWLINE, new HashMap<>()))
+ .isEqualTo("hello\n");
+ }
+
+ @Test
+ public void itStripsTrailingNewlineDefault() {
+ // Defaults keepTrailingNewline=false (matching Python behaviour)
+ Jinjava jinjava = new Jinjava();
+ assertThat(jinjava.render(TEMPLATE_WITH_TRAILING_NEWLINE, new HashMap<>()))
+ .isEqualTo("hello");
+ }
+
+ // ── keepTrailingNewline=false (Python-compatible: strip trailing \n) ────────
+
+ @Test
+ public void itStripsTrailingNewlineIsFalse() {
+ Jinjava jinjava = new Jinjava(
+ JinjavaConfig.newBuilder().withKeepTrailingNewline(false).build()
+ );
+
+ assertThat(jinjava.render(TEMPLATE_WITH_TRAILING_NEWLINE, new HashMap<>()))
+ .isEqualTo("hello");
+ }
+
+ // ── Edge cases ──────────────────────────────────────────────────────────────
+
+ @Test
+ public void itDoesNotAffectOutputWithNoTrailingNewline() {
+ Jinjava jinjava = new Jinjava(
+ JinjavaConfig.newBuilder().withKeepTrailingNewline(true).build()
+ );
+
+ assertThat(jinjava.render(TEMPLATE_WITHOUT_TRAILING_NEWLINE, new HashMap<>()))
+ .isEqualTo("hello");
+ }
+
+ @Test
+ public void itStripsOnlyOneTrailingNewlineNotMultiple() {
+ // Python only strips a single trailing newline, not all of them.
+ Jinjava jinjava = new Jinjava();
+ assertThat(jinjava.render(TEMPLATE_MULTIPLE_TRAILING_NEWLINES, new HashMap<>()))
+ .isEqualTo("hello\n");
+ }
+
+ @Test
+ public void itStripsTrailingNewlineFromRenderedExpressions() {
+ Jinjava jinjava = new Jinjava();
+ assertThat(jinjava.render("{{ greeting }}\n", ImmutableMap.of("greeting", "hello")))
+ .isEqualTo("hello");
+ }
+}
diff --git a/src/test/java/com/hubspot/jinjava/el/ext/AstFilterChainTest.java b/src/test/java/com/hubspot/jinjava/el/ext/AstFilterChainTest.java
index c71050414..0544d1a5c 100644
--- a/src/test/java/com/hubspot/jinjava/el/ext/AstFilterChainTest.java
+++ b/src/test/java/com/hubspot/jinjava/el/ext/AstFilterChainTest.java
@@ -27,7 +27,11 @@ public class AstFilterChainTest {
public void setup() {
jinjava =
new Jinjava(
- BaseJinjavaTest.newConfigBuilder().withEnableFilterChainOptimization(true).build()
+ BaseJinjavaTest
+ .newConfigBuilder()
+ .withEnableFilterChainOptimization(true)
+ .withKeepTrailingNewline(true)
+ .build()
);
context = new HashMap<>();
@@ -123,6 +127,7 @@ public void itSkipsDisabledFilterAndContinuesChain() {
BaseJinjavaTest
.newConfigBuilder()
.withEnableFilterChainOptimization(true)
+ .withKeepTrailingNewline(true)
.withDisabled(disabled)
.build()
);
diff --git a/src/test/java/com/hubspot/jinjava/interpret/LegacyOperatorPrecedenceTest.java b/src/test/java/com/hubspot/jinjava/interpret/LegacyOperatorPrecedenceTest.java
index c9849a9a0..20c4729e4 100644
--- a/src/test/java/com/hubspot/jinjava/interpret/LegacyOperatorPrecedenceTest.java
+++ b/src/test/java/com/hubspot/jinjava/interpret/LegacyOperatorPrecedenceTest.java
@@ -21,6 +21,7 @@ public void setUp() throws Exception {
new Jinjava(
BaseJinjavaTest
.newConfigBuilder()
+ .withKeepTrailingNewline(true)
.withLegacyOverrides(LegacyOverrides.NONE)
.build()
);
@@ -28,6 +29,7 @@ public void setUp() throws Exception {
new Jinjava(
BaseJinjavaTest
.newConfigBuilder()
+ .withKeepTrailingNewline(true)
.withLegacyOverrides(
LegacyOverrides.newBuilder().withUseNaturalOperatorPrecedence(true).build()
)
diff --git a/src/test/java/com/hubspot/jinjava/interpret/LegacyWhitespaceControlParsingTest.java b/src/test/java/com/hubspot/jinjava/interpret/LegacyWhitespaceControlParsingTest.java
index 09289d4b5..fffd95ddd 100644
--- a/src/test/java/com/hubspot/jinjava/interpret/LegacyWhitespaceControlParsingTest.java
+++ b/src/test/java/com/hubspot/jinjava/interpret/LegacyWhitespaceControlParsingTest.java
@@ -21,6 +21,7 @@ public void setUp() throws Exception {
new Jinjava(
BaseJinjavaTest
.newConfigBuilder()
+ .withKeepTrailingNewline(true)
.withLegacyOverrides(LegacyOverrides.NONE)
.build()
);
@@ -28,6 +29,7 @@ public void setUp() throws Exception {
new Jinjava(
BaseJinjavaTest
.newConfigBuilder()
+ .withKeepTrailingNewline(true)
.withLegacyOverrides(
LegacyOverrides.newBuilder().withParseWhitespaceControlStrictly(true).build()
)
diff --git a/src/test/java/com/hubspot/jinjava/lib/filter/SliceFilterTest.java b/src/test/java/com/hubspot/jinjava/lib/filter/SliceFilterTest.java
index 571a7b0bd..62e0ea820 100644
--- a/src/test/java/com/hubspot/jinjava/lib/filter/SliceFilterTest.java
+++ b/src/test/java/com/hubspot/jinjava/lib/filter/SliceFilterTest.java
@@ -7,6 +7,7 @@
import com.google.common.collect.Lists;
import com.google.common.io.Resources;
import com.hubspot.jinjava.BaseJinjavaTest;
+import com.hubspot.jinjava.Jinjava;
import com.hubspot.jinjava.interpret.RenderResult;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
@@ -14,10 +15,19 @@
import java.util.Random;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
+import org.junit.Before;
import org.junit.Test;
public class SliceFilterTest extends BaseJinjavaTest {
+ @Before
+ public void setup() {
+ jinjava =
+ new Jinjava(
+ BaseJinjavaTest.newConfigBuilder().withKeepTrailingNewline(true).build()
+ );
+ }
+
@Test
public void itSlicesLists() throws Exception {
Document dom = Jsoup.parseBodyFragment(
diff --git a/src/test/java/com/hubspot/jinjava/lib/tag/ForTagTest.java b/src/test/java/com/hubspot/jinjava/lib/tag/ForTagTest.java
index 5333b5f7c..1895a25a4 100644
--- a/src/test/java/com/hubspot/jinjava/lib/tag/ForTagTest.java
+++ b/src/test/java/com/hubspot/jinjava/lib/tag/ForTagTest.java
@@ -11,6 +11,8 @@
import com.google.common.io.Resources;
import com.hubspot.jinjava.BaseInterpretingTest;
import com.hubspot.jinjava.BaseJinjavaTest;
+import com.hubspot.jinjava.Jinjava;
+import com.hubspot.jinjava.JinjavaConfig;
import com.hubspot.jinjava.LegacyOverrides;
import com.hubspot.jinjava.interpret.InterpretException;
import com.hubspot.jinjava.interpret.JinjavaInterpreter;
@@ -40,6 +42,11 @@ public class ForTagTest extends BaseInterpretingTest {
@Override
public void baseSetup() {
super.baseSetup();
+
+ jinjava =
+ new Jinjava(
+ BaseJinjavaTest.newConfigBuilder().withKeepTrailingNewline(true).build()
+ );
tag = new ForTag();
try {
diff --git a/src/test/java/com/hubspot/jinjava/tree/parse/BackslashHandlingTest.java b/src/test/java/com/hubspot/jinjava/tree/parse/BackslashHandlingTest.java
new file mode 100644
index 000000000..0515e4e10
--- /dev/null
+++ b/src/test/java/com/hubspot/jinjava/tree/parse/BackslashHandlingTest.java
@@ -0,0 +1,234 @@
+package com.hubspot.jinjava.tree.parse;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import com.google.common.collect.AbstractIterator;
+import com.google.common.collect.ImmutableMap;
+import com.hubspot.jinjava.Jinjava;
+import com.hubspot.jinjava.JinjavaConfig;
+import com.hubspot.jinjava.LegacyOverrides;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import org.junit.Test;
+
+/**
+ * Tests for backslash handling inside block/variable/comment delimiters,
+ * covering both the char-based (DefaultTokenScannerSymbols) and string-based
+ * (StringTokenScannerSymbols) scanning paths, with the
+ * {@link LegacyOverrides#isHandleBackslashInQuotesOnly()} flag both off (legacy)
+ * and on (Jinja2-compatible).
+ */
+public class BackslashHandlingTest {
+
+ // ── Jinjava instances ──────────────────────────────────────────────────────
+
+ /** Char-based scanner, legacy backslash behaviour (flag = false). */
+ private static Jinjava charLegacy() {
+ return new Jinjava(
+ JinjavaConfig
+ .newBuilder()
+ .withLegacyOverrides(LegacyOverrides.newBuilder().build())
+ .build()
+ );
+ }
+
+ /** Char-based scanner, Jinja2-compatible backslash behaviour (flag = true). */
+ private static Jinjava charNew() {
+ return new Jinjava(
+ JinjavaConfig
+ .newBuilder()
+ .withLegacyOverrides(
+ LegacyOverrides.newBuilder().withHandleBackslashInQuotesOnly(true).build()
+ )
+ .build()
+ );
+ }
+
+ /** String-based scanner, legacy backslash behaviour (flag = false). */
+ private static Jinjava stringLegacy() {
+ return new Jinjava(
+ JinjavaConfig
+ .newBuilder()
+ .withTokenScannerSymbols(StringTokenScannerSymbols.builder().build())
+ .withLegacyOverrides(LegacyOverrides.newBuilder().build())
+ .build()
+ );
+ }
+
+ /** String-based scanner, Jinja2-compatible backslash behaviour (flag = true). */
+ private static Jinjava stringNew() {
+ return new Jinjava(
+ JinjavaConfig
+ .newBuilder()
+ .withTokenScannerSymbols(StringTokenScannerSymbols.builder().build())
+ .withLegacyOverrides(
+ LegacyOverrides.newBuilder().withHandleBackslashInQuotesOnly(true).build()
+ )
+ .build()
+ );
+ }
+
+ // ── Backslash inside a quoted string ──────────────────────────────────────
+ //
+ // Both legacy and new behaviour must handle escaped quotes inside strings
+ // correctly — \" should not close the string.
+
+ @Test
+ public void charLegacy_escapedQuoteInsideString() {
+ assertThat(charLegacy().render("{{ \"he said \\\"hi\\\"\" }}", new HashMap<>()))
+ .isEqualTo("he said \"hi\"");
+ }
+
+ @Test
+ public void charNew_escapedQuoteInsideString() {
+ assertThat(charNew().render("{{ \"he said \\\"hi\\\"\" }}", new HashMap<>()))
+ .isEqualTo("he said \"hi\"");
+ }
+
+ @Test
+ public void stringLegacy_escapedQuoteInsideString() {
+ assertThat(stringLegacy().render("{{ \"he said \\\"hi\\\"\" }}", new HashMap<>()))
+ .isEqualTo("he said \"hi\"");
+ }
+
+ @Test
+ public void stringNew_escapedQuoteInsideString() {
+ assertThat(stringNew().render("{{ \"he said \\\"hi\\\"\" }}", new HashMap<>()))
+ .isEqualTo("he said \"hi\"");
+ }
+
+ // ── Backslash outside a quoted string ─────────────────────────────────────
+ //
+ // Template under test: "prefix {{ x \}} suffix }}"
+ //
+ // We test the scanner token structure directly rather than going through
+ // render(), because the expression "x \..." is always a JUEL lexical error
+ // regardless of mode. What differs between modes is which token boundaries
+ // the scanner produces — and that is what we assert on.
+ //
+ // Legacy (backslashInQuotesOnly = false):
+ // Scanner consumes '\' and skips the following '}'. The first '}}' is not
+ // recognized as a closer. The block runs until the second '}}', so the
+ // token sequence is:
+ // TEXT "prefix " | EXPR "{{ x \}} suffix }}"
+ //
+ // New (backslashInQuotesOnly = true):
+ // Scanner leaves '\' untouched. The first '}}' is recognized as the closer.
+ // The token sequence is:
+ // TEXT "prefix " | EXPR "{{ x \}}" | TEXT " suffix }}"
+
+ private static final String BACKSLASH_TEMPLATE = "prefix {{ x \\}} suffix }}";
+
+ @Test
+ public void charLegacy_backslashConsumesOneDelimiterChar_blockRunsToSecondCloser() {
+ List
+ *
+ */
+ private Token handleLineComment() {
+ boolean startOfLine = isStartOfLine(currPost);
+ int afterPrefix = currPost + lineCommentPrefix.length;
+ boolean hasTrimModifier =
+ afterPrefix < length && is[afterPrefix] == symbols.getTrimChar();
+
+ int flushUpTo;
+ if (!startOfLine) {
+ // Mid-line comment: flush up to the %# prefix, stripping trailing
+ // horizontal whitespace before it (Python strips spaces/tabs before
+ // mid-line comments, e.g. "hello %# comment" → "hello").
+ int p = currPost - 1;
+ while (p >= tokenStart && (is[p] == ' ' || is[p] == '\t')) {
+ p--;
+ }
+ flushUpTo = p + 1;
+ } else if (hasTrimModifier) {
+ // Start-of-line %#-: strip preceding blank lines and the real-content \n.
+ flushUpTo = lineIndentStartSkippingBlanks(currPost);
+ } else {
+ // Start-of-line %#: strip only the current line's indentation.
+ flushUpTo = lineIndentStart(currPost);
+ }
+
+ Token pending = flushTextBefore(flushUpTo);
+
+ // Advance past the comment content to the end of the line.
+ int end = afterPrefix;
+ while (end < length && is[end] != '\n') {
+ end++;
+ }
+
+ // Both %# and %#- keep the trailing \n — it appears in the output.
+ tokenStart = end;
+ currPost = end;
+
+ return (pending != null) ? pending : DELIMITER_MATCHED;
+ }
+
+ /**
+ * Returns the position of the first character of the indentation on the line
+ * containing {@code pos} — i.e. the position just after the preceding newline
+ * (or 0 if at the start of input). Used to exclude leading horizontal whitespace
+ * from the text token flushed before a line prefix match.
+ */
+ private int lineIndentStart(int pos) {
+ int p = pos - 1;
+ while (p >= 0 && (is[p] == ' ' || is[p] == '\t')) {
+ p--;
+ }
+ // p is now at the newline before the indentation, or at -1.
+ return p + 1;
+ }
+
+ /**
+ * Returns the flush boundary for a {@code %#-} line comment.
+ *
+ *
+ * "A\n\n%#-" → flush "A|" → output "A" + comment's \n
+ * "%% set\n%#-" → flush nothing → output comment's \n (tokenStart guard)
+ *
+ */
+ private int lineIndentStartSkippingBlanks(int pos) {
+ int p = pos - 1;
+ while (p >= tokenStart) {
+ // Skip trailing horizontal whitespace on this line (going backwards).
+ while (p >= tokenStart && (is[p] == ' ' || is[p] == '\t')) {
+ p--;
+ }
+ if (p < tokenStart) {
+ break;
+ }
+ if (is[p] == '\n') {
+ // Blank line — consume this \n and keep scanning backwards.
+ p--;
+ } else {
+ // Real content at position p. The \n ending this line is at p+1.
+ // Return p+1 so flushTextBefore(p+1) flushes up to but NOT including
+ // that \n, stripping it from the output.
+ return p + 1;
+ }
+ }
+ // Reached tokenStart without finding real content — all blank lines were
+ // preceded by a line statement or plain comment. Preserve them.
+ return tokenStart;
+ }
+
+ // ── One-slot stash for the synthetic tag after a line-statement ─────────
+ // When a line-statement prefix is found and there is pending text to flush
+ // first, we return the text token immediately and stash the synthetic tag
+ // here so computeNext() picks it up on the very next call.
+ private Token pendingToken = null;
+
+ @Override
+ protected Token computeNext() {
+ // Drain any stashed token first.
+ if (pendingToken != null) {
+ Token t = pendingToken;
+ pendingToken = null;
+ return t;
+ }
+
+ Token t = getNextToken();
+ if (t == null) {
+ return endOfData();
+ }
+ return t;
+ }
+
+ // ── Helpers ───────────────────────────────────────────────────────────────
+
+ /**
+ * Returns true when {@code pos} is at the start of a line — i.e. it is either
+ * the very first character of the input, or the character immediately after a
+ * newline (accounting for any leading whitespace that lstripBlocks may allow).
+ */
+ private boolean isStartOfLine(int pos) {
+ if (pos == 0) {
+ return true;
+ }
+ // Walk backwards past any horizontal whitespace (spaces/tabs).
+ int p = pos - 1;
+ while (p >= 0 && (is[p] == ' ' || is[p] == '\t')) {
+ p--;
+ }
+ // True if we hit the beginning of the input or a newline.
+ return p < 0 || is[p] == '\n';
+ }
+
+ /**
+ * If {@code is[tokenStart..upTo)} contains un-emitted plain text, captures it
+ * as a TEXT token and returns it. Returns {@code null} for zero-length regions.
+ * Does NOT update {@code tokenStart} — the caller sets it after returning.
+ */
+ private Token flushTextBefore(int upTo) {
+ int textLen = upTo - tokenStart;
+ if (textLen <= 0) {
+ return null;
+ }
+ lastStart = tokenStart;
+ tokenLength = textLen;
+ return emitToken(symbols.getFixed());
+ }
+
+ /** Returns the closing delimiter for the currently open block kind. */
+ private char[] closingDelimFor(int currentKind) {
+ if (currentKind == symbols.getExprStart()) {
+ return varEnd;
+ }
+ if (currentKind == symbols.getTag()) {
+ return blkEnd;
+ }
+ if (currentKind == symbols.getNote()) {
+ return cmtEnd;
+ }
+ return null;
+ }
+
+ /**
+ * Constructs a token from {@code lastStart}/{@code tokenLength}, then applies
+ * trimBlocks and raw-mode post-processing identical to the char-based path.
+ */
+ private Token emitToken(int kind) {
+ Token t = Token.newToken(
+ kind,
+ symbols,
+ whitespaceControlParser,
+ String.valueOf(is, lastStart, tokenLength),
+ currLine,
+ lastStart - lastNewlinePos + 1
+ );
+
+ if (
+ (t instanceof TagToken || t instanceof NoteToken) &&
+ config.isTrimBlocks() &&
+ currPost < length &&
+ is[currPost] == '\n'
+ ) {
+ lastNewlinePos = currPost + 1;
+ ++currPost;
+ ++tokenStart;
+ }
+
+ if (t instanceof TagToken) {
+ TagToken tt = (TagToken) t;
+ if ("raw".equals(tt.getTagName())) {
+ inRaw = 1;
+ return tt;
+ } else if ("endraw".equals(tt.getTagName())) {
+ inRaw = 0;
+ return tt;
+ }
+ }
+
+ if (inRaw > 0 && t.getType() != symbols.getFixed()) {
+ return Token.newToken(
+ symbols.getFixed(),
+ symbols,
+ whitespaceControlParser,
+ t.image,
+ currLine,
+ lastStart - lastNewlinePos + 1
+ );
+ }
+
+ return t;
+ }
+
+ /**
+ * Emits whatever remains at end-of-input.
+ * Advances {@code tokenStart = currPost} so subsequent calls return null.
+ */
+ private Token getEndToken() {
+ tokenLength = currPost - tokenStart;
+ lastStart = tokenStart;
+ tokenStart = currPost;
+ int type = symbols.getFixed();
+ if (inComment > 0) {
+ type = symbols.getNote();
+ } else if (inBlock > 0) {
+ return new UnclosedToken(
+ String.valueOf(is, lastStart, tokenLength),
+ currLine,
+ lastStart - lastNewlinePos + 1,
+ symbols,
+ whitespaceControlParser
+ );
+ }
+ return Token.newToken(
+ type,
+ symbols,
+ whitespaceControlParser,
+ String.valueOf(is, lastStart, tokenLength),
+ currLine,
+ lastStart - lastNewlinePos + 1
+ );
+ }
+
+ /** Returns true if {@code is[pos..]} starts with {@code pattern}. */
+ private boolean regionMatches(int pos, char[] pattern) {
+ if (pos + pattern.length > length) {
+ return false;
+ }
+ for (int i = 0; i < pattern.length; i++) {
+ if (is[pos + i] != pattern[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbols.java b/src/main/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbols.java
new file mode 100644
index 000000000..242abd241
--- /dev/null
+++ b/src/main/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbols.java
@@ -0,0 +1,269 @@
+/**********************************************************************
+ * Copyright (c) 2014 HubSpot Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **********************************************************************/
+package com.hubspot.jinjava.tree.parse;
+
+/**
+ * A {@link TokenScannerSymbols} implementation that supports arbitrary multi-character
+ * delimiter strings, addressing
+ * issue #195.
+ *
+ * Example
+ * {@code
+ * JinjavaConfig config = JinjavaConfig.newBuilder()
+ * .withTokenScannerSymbols(StringTokenScannerSymbols.builder()
+ * .withVariableStartString("\\VAR{")
+ * .withVariableEndString("}")
+ * .withBlockStartString("\\BLOCK{")
+ * .withBlockEndString("}")
+ * .withCommentStartString("\\#{")
+ * .withCommentEndString("}")
+ * .build())
+ * .build();
+ * }
+ */
+public class StringTokenScannerSymbols extends TokenScannerSymbols {
+
+ private static final long serialVersionUID = 1L;
+
+ // ── Internal sentinel chars ────────────────────────────────────────────────
+ // Unicode Private Use Area values — guaranteed never to appear in real template
+ // text, so Token.newToken()'s if-chain dispatches to the right Token subclass.
+ static final char SENTINEL_FIXED = '\uE000';
+ static final char SENTINEL_NOTE = '\uE001';
+ static final char SENTINEL_TAG = '\uE002';
+ static final char SENTINEL_EXPR_START = '\uE003';
+ static final char SENTINEL_EXPR_END = '\uE004';
+ static final char SENTINEL_PREFIX = '\uE005'; // unused for scanning
+ static final char SENTINEL_POSTFIX = '\uE006'; // unused for scanning
+ static final char SENTINEL_NEWLINE = '\n'; // real newline for line tracking
+ static final char SENTINEL_TRIM = '-'; // real trim char
+
+ // ── The configured string delimiters ──────────────────────────────────────
+ private final String variableStartString;
+ private final String variableEndString;
+ private final String blockStartString;
+ private final String blockEndString;
+ private final String commentStartString;
+ private final String commentEndString;
+ // Optional; null means disabled.
+ private final String lineStatementPrefix;
+ private final String lineCommentPrefix;
+
+ private StringTokenScannerSymbols(Builder builder) {
+ this.variableStartString = builder.variableStartString;
+ this.variableEndString = builder.variableEndString;
+ this.blockStartString = builder.blockStartString;
+ this.blockEndString = builder.blockEndString;
+ this.commentStartString = builder.commentStartString;
+ this.commentEndString = builder.commentEndString;
+ this.lineStatementPrefix = builder.lineStatementPrefix;
+ this.lineCommentPrefix = builder.lineCommentPrefix;
+ }
+
+ // ── Abstract char contract — returns sentinels only ───────────────────────
+
+ @Override
+ public char getPrefixChar() {
+ return SENTINEL_PREFIX;
+ }
+
+ @Override
+ public char getPostfixChar() {
+ return SENTINEL_POSTFIX;
+ }
+
+ @Override
+ public char getFixedChar() {
+ return SENTINEL_FIXED;
+ }
+
+ @Override
+ public char getNoteChar() {
+ return SENTINEL_NOTE;
+ }
+
+ @Override
+ public char getTagChar() {
+ return SENTINEL_TAG;
+ }
+
+ @Override
+ public char getExprStartChar() {
+ return SENTINEL_EXPR_START;
+ }
+
+ @Override
+ public char getExprEndChar() {
+ return SENTINEL_EXPR_END;
+ }
+
+ @Override
+ public char getNewlineChar() {
+ return SENTINEL_NEWLINE;
+ }
+
+ @Override
+ public char getTrimChar() {
+ return SENTINEL_TRIM;
+ }
+
+ // ── String-level getters: MUST override the base-class lazy cache ──────────
+ // The base class builds these from the char methods above, which would produce
+ // garbage sentinel strings. We override them to return the real delimiters so
+ // that ExpressionToken, TagToken, and NoteToken strip content correctly.
+
+ @Override
+ public String getExpressionStart() {
+ return variableStartString;
+ }
+
+ @Override
+ public String getExpressionEnd() {
+ return variableEndString;
+ }
+
+ @Override
+ public String getExpressionStartWithTag() {
+ return blockStartString;
+ }
+
+ @Override
+ public String getExpressionEndWithTag() {
+ return blockEndString;
+ }
+
+ @Override
+ public String getOpeningComment() {
+ return commentStartString;
+ }
+
+ @Override
+ public String getClosingComment() {
+ return commentEndString;
+ }
+
+ @Override
+ public String getLineStatementPrefix() {
+ return lineStatementPrefix;
+ }
+
+ @Override
+ public String getLineCommentPrefix() {
+ return lineCommentPrefix;
+ }
+
+ // ── isStringBased flag ────────────────────────────────────────────────────
+
+ @Override
+ public boolean isStringBased() {
+ return true;
+ }
+
+ // ── Builder ────────────────────────────────────────────────────────────────
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ public static final class Builder {
+
+ // Defaults mirror the standard Jinja2 delimiters, so building with no
+ // overrides behaves identically to DefaultTokenScannerSymbols.
+ private String variableStartString = "{{";
+ private String variableEndString = "}}";
+ private String blockStartString = "{%";
+ private String blockEndString = "%}";
+ private String commentStartString = "{#";
+ private String commentEndString = "#}";
+ private String lineStatementPrefix = null; // disabled by default
+ private String lineCommentPrefix = null; // disabled by default
+
+ public Builder withVariableStartString(String s) {
+ this.variableStartString = requireNonEmpty(s, "variableStartString");
+ return this;
+ }
+
+ public Builder withVariableEndString(String s) {
+ this.variableEndString = requireNonEmpty(s, "variableEndString");
+ return this;
+ }
+
+ public Builder withBlockStartString(String s) {
+ this.blockStartString = requireNonEmpty(s, "blockStartString");
+ return this;
+ }
+
+ public Builder withBlockEndString(String s) {
+ this.blockEndString = requireNonEmpty(s, "blockEndString");
+ return this;
+ }
+
+ public Builder withCommentStartString(String s) {
+ this.commentStartString = requireNonEmpty(s, "commentStartString");
+ return this;
+ }
+
+ public Builder withCommentEndString(String s) {
+ this.commentEndString = requireNonEmpty(s, "commentEndString");
+ return this;
+ }
+
+ /**
+ * Sets the line statement prefix (e.g. {@code "%%"}). A line beginning with
+ * this prefix is treated as a block tag, equivalent to wrapping its content
+ * in the configured block delimiters. Pass {@code null} to disable (default).
+ */
+ public Builder withLineStatementPrefix(String s) {
+ this.lineStatementPrefix = s;
+ return this;
+ }
+
+ /**
+ * Sets the line comment prefix (e.g. {@code "%#"}). A line beginning with
+ * this prefix is stripped entirely from the output. Pass {@code null} to
+ * disable (default).
+ */
+ public Builder withLineCommentPrefix(String s) {
+ this.lineCommentPrefix = s;
+ return this;
+ }
+
+ public StringTokenScannerSymbols build() {
+ return new StringTokenScannerSymbols(this);
+ }
+
+ private static String requireNonEmpty(String value, String name) {
+ if (value == null || value.isEmpty()) {
+ throw new IllegalArgumentException(name + " must not be null or empty");
+ }
+ return value;
+ }
+ }
+}
diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/TagToken.java b/src/main/java/com/hubspot/jinjava/tree/parse/TagToken.java
index a737dd96c..0c500c145 100644
--- a/src/main/java/com/hubspot/jinjava/tree/parse/TagToken.java
+++ b/src/main/java/com/hubspot/jinjava/tree/parse/TagToken.java
@@ -54,7 +54,10 @@ public int getType() {
*/
@Override
protected void parse() {
- if (image.length() < 4) {
+ int startLen = getSymbols().getTagStartLength();
+ int endLen = getSymbols().getTagEndLength();
+
+ if (image.length() < startLen + endLen) {
throw new TemplateSyntaxException(
image,
"Malformed tag token",
@@ -63,7 +66,7 @@ protected void parse() {
);
}
- content = image.substring(2, image.length() - 2);
+ content = image.substring(startLen, image.length() - endLen);
content = handleTrim(content);
int nameStart = -1, pos = 0, len = content.length();
diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java b/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java
index 7e53b295a..de3b6e040 100644
--- a/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java
+++ b/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java
@@ -21,6 +21,13 @@
import com.hubspot.jinjava.JinjavaConfig;
import com.hubspot.jinjava.features.BuiltInFeatures;
+/**
+ * Character-based token scanner for the standard single-character-prefix delimiter
+ * scheme (e.g. {@code {{}, {@code {%}, {@code {#}).
+ *
+ *