diff --git a/src/main/java/com/hubspot/jinjava/LegacyOverrides.java b/src/main/java/com/hubspot/jinjava/LegacyOverrides.java
index bd3732455..ba9278127 100644
--- a/src/main/java/com/hubspot/jinjava/LegacyOverrides.java
+++ b/src/main/java/com/hubspot/jinjava/LegacyOverrides.java
@@ -21,6 +21,7 @@ public interface LegacyOverrides extends WithLegacyOverrides {
.withAllowAdjacentTextNodes(true)
.withUseTrimmingForNotesAndExpressions(true)
.withKeepNullableLoopValues(true)
+ .withHandleBackslashInQuotesOnly(true)
.build();
LegacyOverrides ALL = new Builder()
.withEvaluateMapKeys(true)
@@ -32,6 +33,7 @@ public interface LegacyOverrides extends WithLegacyOverrides {
.withAllowAdjacentTextNodes(true)
.withUseTrimmingForNotesAndExpressions(true)
.withKeepNullableLoopValues(true)
+ .withHandleBackslashInQuotesOnly(true)
.build();
@Value.Default
@@ -79,6 +81,23 @@ default boolean isKeepNullableLoopValues() {
return false;
}
+ /**
+ * When {@code true}, the token scanner treats backslash as an escape character
+ * only inside quoted string literals, leaving bare backslashes outside quotes
+ * untouched for the expression parser (JUEL) to handle. This matches the
+ * behaviour of Python's Jinja2, where the template scanner is not responsible
+ * for backslash interpretation at all.
+ *
+ *
When {@code false} (the default), the scanner consumes a backslash and
+ * the following character unconditionally, regardless of quote context. This
+ * is the legacy Jinjava behaviour, which prevents closing delimiters from
+ * being recognized after a backslash but diverges from Jinja2.
+ */
+ @Value.Default
+ default boolean isHandleBackslashInQuotesOnly() {
+ return false;
+ }
+
class Builder extends ImmutableLegacyOverrides.Builder {}
static Builder newBuilder() {
diff --git a/src/main/java/com/hubspot/jinjava/tree/TreeParser.java b/src/main/java/com/hubspot/jinjava/tree/TreeParser.java
index 56f11003c..fedadbaf9 100644
--- a/src/main/java/com/hubspot/jinjava/tree/TreeParser.java
+++ b/src/main/java/com/hubspot/jinjava/tree/TreeParser.java
@@ -32,6 +32,7 @@
import com.hubspot.jinjava.lib.tag.FlexibleTag;
import com.hubspot.jinjava.lib.tag.Tag;
import com.hubspot.jinjava.tree.parse.ExpressionToken;
+import com.hubspot.jinjava.tree.parse.StringTokenScanner;
import com.hubspot.jinjava.tree.parse.TagToken;
import com.hubspot.jinjava.tree.parse.TextToken;
import com.hubspot.jinjava.tree.parse.Token;
@@ -39,6 +40,7 @@
import com.hubspot.jinjava.tree.parse.TokenScannerSymbols;
import com.hubspot.jinjava.tree.parse.UnclosedToken;
import com.hubspot.jinjava.tree.parse.WhitespaceControlParser;
+import java.util.Iterator;
import org.apache.commons.lang3.StringUtils;
public class TreeParser {
@@ -52,7 +54,7 @@ public class TreeParser {
public TreeParser(JinjavaInterpreter interpreter, String input) {
this.scanner =
- Iterators.peekingIterator(new TokenScanner(input, interpreter.getConfig()));
+ Iterators.peekingIterator(createScanner(input, interpreter.getConfig()));
this.interpreter = interpreter;
this.symbols = interpreter.getConfig().getTokenScannerSymbols();
this.whitespaceControlParser =
@@ -104,6 +106,13 @@ public Node buildTree() {
return root;
}
+ private static Iterator createScanner(String input, JinjavaConfig config) {
+ if (config.getTokenScannerSymbols().isStringBased()) {
+ return new StringTokenScanner(input, config);
+ }
+ return new TokenScanner(input, config);
+ }
+
/**
* @return null if EOF or error
*/
diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/ExpressionToken.java b/src/main/java/com/hubspot/jinjava/tree/parse/ExpressionToken.java
index d8d9996d5..1c0d679c0 100644
--- a/src/main/java/com/hubspot/jinjava/tree/parse/ExpressionToken.java
+++ b/src/main/java/com/hubspot/jinjava/tree/parse/ExpressionToken.java
@@ -54,7 +54,14 @@ public int getType() {
@Override
protected void parse() {
- this.expr = WhitespaceUtils.unwrap(image, "{{", "}}");
+ // Use the symbols-derived delimiter strings instead of the hardcoded "{{" / "}}"
+ // so that custom delimiters (e.g. "\VAR{" / "}") are stripped correctly.
+ this.expr =
+ WhitespaceUtils.unwrap(
+ image,
+ getSymbols().getExpressionStart(),
+ getSymbols().getExpressionEnd()
+ );
this.expr = handleTrim(expr);
this.expr = StringUtils.trimToEmpty(this.expr);
}
diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/NoteToken.java b/src/main/java/com/hubspot/jinjava/tree/parse/NoteToken.java
index 3f5360e67..450f9ccbd 100644
--- a/src/main/java/com/hubspot/jinjava/tree/parse/NoteToken.java
+++ b/src/main/java/com/hubspot/jinjava/tree/parse/NoteToken.java
@@ -48,8 +48,11 @@ public int getType() {
*/
@Override
protected void parse() {
- if (image.length() > 4) { // {# #}
- handleTrim(image.substring(2, image.length() - 2));
+ int startLen = getSymbols().getCommentStartLength();
+ int endLen = getSymbols().getCommentEndLength();
+
+ if (image.length() > startLen + endLen) {
+ handleTrim(image.substring(startLen, image.length() - endLen));
}
content = "";
}
diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/StringTokenScanner.java b/src/main/java/com/hubspot/jinjava/tree/parse/StringTokenScanner.java
new file mode 100644
index 000000000..0e5df631c
--- /dev/null
+++ b/src/main/java/com/hubspot/jinjava/tree/parse/StringTokenScanner.java
@@ -0,0 +1,685 @@
+/*
+ Copyright (c) 2014 HubSpot Inc.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+package com.hubspot.jinjava.tree.parse;
+
+import static com.hubspot.jinjava.util.CharArrayUtils.charArrayRegionMatches;
+
+import com.google.common.collect.AbstractIterator;
+import com.hubspot.jinjava.JinjavaConfig;
+
+/**
+ * String-matching token scanner for {@link TokenScannerSymbols} implementations
+ * where {@link TokenScannerSymbols#isStringBased()} returns {@code true} — most
+ * notably {@link StringTokenScannerSymbols}.
+ *
+ * Unlike the character-based {@link TokenScanner}, this scanner matches
+ * multi-character delimiter strings directly (e.g. {@code \VAR{} / {@code }},
+ * {@code \BLOCK{} / {@code }}) without relying on a shared prefix character. It also
+ * supports optional {@link TokenScannerSymbols#getLineStatementPrefix() line statement}
+ * and {@link TokenScannerSymbols#getLineCommentPrefix() line comment} prefixes,
+ * matching Python Jinja2 semantics.
+ *
+ *
{@link TreeParser} selects this scanner automatically when
+ * {@code symbols.isStringBased()} is {@code true}; callers never instantiate it
+ * directly.
+ */
+public class StringTokenScanner extends AbstractIterator {
+
+ private final JinjavaConfig config;
+
+ private final char[] is;
+ private final int length;
+
+ private int currPost = 0;
+ private int tokenStart = 0;
+ private int tokenLength = 0;
+ private int tokenKind = -1;
+ private int lastStart = 0;
+ private int inComment = 0;
+ private int inRaw = 0;
+ private int inBlock = 0;
+ private char inQuote = 0;
+ private int currLine = 1;
+ private int lastNewlinePos = 0;
+ private final TokenScannerSymbols symbols;
+ private final WhitespaceControlParser whitespaceControlParser;
+
+ private final char[] varStart;
+ private final char[] varEnd;
+ private final char[] blkStart;
+ private final char[] blkEnd;
+ private final char[] cmtStart;
+ private final char[] cmtEnd;
+
+ // Optional line-oriented prefixes; null when not configured.
+ private final char[] lineStmtPrefix;
+ private final char[] lineCommentPrefix;
+
+ // When true, backslash is treated as an escape character only inside quoted
+ // string literals, matching Jinja2 behaviour. When false (legacy default),
+ // the scanner consumes backslash + next char unconditionally.
+ private final boolean backslashInQuotesOnly;
+
+ // Remembers where the current opening delimiter began so the emitted block/comment
+ // token image starts from the opener (not the content), letting parse() strip the
+ // correct number of delimiter characters from both ends.
+ private int blockOpenerStart = 0;
+
+ public StringTokenScanner(String input, JinjavaConfig config) {
+ this.config = config;
+
+ is = input.toCharArray();
+ length = is.length;
+
+ symbols = config.getTokenScannerSymbols();
+ whitespaceControlParser =
+ config.getLegacyOverrides().isParseWhitespaceControlStrictly()
+ ? WhitespaceControlParser.STRICT
+ : WhitespaceControlParser.LENIENT;
+
+ varStart = symbols.getExpressionStart().toCharArray();
+ varEnd = symbols.getExpressionEnd().toCharArray();
+ blkStart = symbols.getExpressionStartWithTag().toCharArray();
+ blkEnd = symbols.getExpressionEndWithTag().toCharArray();
+ cmtStart = symbols.getOpeningComment().toCharArray();
+ cmtEnd = symbols.getClosingComment().toCharArray();
+
+ String lsp = symbols.getLineStatementPrefix();
+ lineStmtPrefix = (lsp != null && !lsp.isEmpty()) ? lsp.toCharArray() : null;
+
+ String lcp = symbols.getLineCommentPrefix();
+ lineCommentPrefix = (lcp != null && !lcp.isEmpty()) ? lcp.toCharArray() : null;
+
+ backslashInQuotesOnly = config.getLegacyOverrides().isHandleBackslashInQuotesOnly();
+ }
+
+ // ── Core scanning loop ────────────────────────────────────────────────────
+ //
+ // tokenStart — start of the next text region to buffer.
+ // blockOpenerStart — position of the current opening delimiter; the emitted
+ // block/comment token image begins here.
+ // lastStart / tokenLength — the slice passed to Token.newToken().
+ //
+ // Two-phase emission:
+ // 1. Opener detected → flush buffered plain text as TEXT, record
+ // blockOpenerStart, advance tokenStart/currPost past the opener into
+ // the block content, set inBlock/inComment.
+ // 2. Closer detected → emit is[blockOpenerStart .. closerEnd) as the
+ // appropriate token type; advance tokenStart = currPost = closerEnd.
+
+ // Sentinel returned by scan helpers to mean "a delimiter was matched and
+ // scanner state was updated — loop again without advancing currPost".
+ // Any non-null return from a helper that is NOT this sentinel is a real token.
+ private static final Token DELIMITER_MATCHED = new TextToken(
+ "",
+ 0,
+ 0,
+ new DefaultTokenScannerSymbols()
+ );
+
+ private Token getNextToken() {
+ while (currPost < length) {
+ char c = is[currPost];
+
+ if (c == '\n') {
+ currLine++;
+ lastNewlinePos = currPost + 1;
+ }
+
+ if (inComment > 0) {
+ Token t = scanInsideComment();
+ if (t != null) {
+ return t;
+ }
+ continue; // scanInsideComment advanced currPost
+ }
+
+ if (inBlock > 0) {
+ Token t = scanInsideBlock(c);
+ if (t == DELIMITER_MATCHED) {
+ continue; // closer not yet found, currPost already advanced
+ }
+ if (t != null) {
+ return t;
+ }
+ continue;
+ }
+
+ if (inRaw == 0) {
+ Token t = scanPlainText(c);
+ if (t == DELIMITER_MATCHED) {
+ continue; // opener matched, state updated, no pending text
+ }
+ if (t != null) {
+ return t; // pending text flushed, or line-statement token
+ }
+ // null means nothing matched — fall through to advance
+ } else {
+ Token t = scanRawMode();
+ if (t == DELIMITER_MATCHED) {
+ continue;
+ }
+ if (t != null) {
+ return t;
+ }
+ }
+
+ currPost++;
+ }
+
+ if (currPost > tokenStart) {
+ return getEndToken();
+ }
+ return null;
+ }
+
+ /** Scans one character while inside a comment block; advances {@code currPost}. */
+ private Token scanInsideComment() {
+ if (regionMatches(currPost, cmtEnd)) {
+ lastStart = blockOpenerStart;
+ tokenLength = currPost + cmtEnd.length - blockOpenerStart;
+ tokenStart = currPost + cmtEnd.length;
+ currPost = tokenStart;
+ inComment = 0;
+ int kind = tokenKind;
+ tokenKind = symbols.getFixed();
+ return emitToken(kind);
+ }
+ currPost++;
+ return null;
+ }
+
+ /**
+ * Scans one character while inside a variable or tag block; advances
+ * {@code currPost}. Returns a real token when the closer is found, or
+ * {@link #DELIMITER_MATCHED} (meaning "keep looping") otherwise.
+ */
+ private Token scanInsideBlock(char c) {
+ if (inQuote != 0) {
+ // Inside a quoted string: a backslash always escapes the next character.
+ if (c == '\\') {
+ currPost += (currPost + 1 < length) ? 2 : 1;
+ return DELIMITER_MATCHED;
+ }
+ if (c == inQuote) {
+ inQuote = 0;
+ }
+ currPost++;
+ return DELIMITER_MATCHED;
+ }
+ // Outside a quoted string: only consume the backslash if the legacy
+ // flag is enabled; otherwise leave it for the expression parser.
+ if (c == '\\' && !backslashInQuotesOnly) {
+ currPost += (currPost + 1 < length) ? 2 : 1;
+ return DELIMITER_MATCHED;
+ }
+ if (c == '\'' || c == '"') {
+ inQuote = c;
+ currPost++;
+ return DELIMITER_MATCHED;
+ }
+ // Check for the closing delimiter matching the current block type.
+ char[] closeDelim = closingDelimFor(tokenKind);
+ if (closeDelim != null && regionMatches(currPost, closeDelim)) {
+ lastStart = blockOpenerStart;
+ tokenLength = currPost + closeDelim.length - blockOpenerStart;
+ tokenStart = currPost + closeDelim.length;
+ currPost = tokenStart;
+ inBlock = 0;
+ int kind = tokenKind;
+ tokenKind = symbols.getFixed();
+ return emitToken(kind);
+ }
+ currPost++;
+ return DELIMITER_MATCHED;
+ }
+
+ /**
+ * Scans for openers while in normal (non-raw) plain-text mode.
+ * Returns a real token when one is ready to emit, {@link #DELIMITER_MATCHED}
+ * when an opener was matched with no pending text, or {@code null} when
+ * nothing matched (caller should advance {@code currPost}).
+ */
+ private Token scanPlainText(char c) {
+ // ── Line statement prefix (e.g. "%% if foo") ──────────────────────────
+ if (
+ lineStmtPrefix != null &&
+ isStartOfLine(currPost) &&
+ regionMatches(currPost, lineStmtPrefix)
+ ) {
+ return handleLineStatement();
+ }
+ // ── Line comment prefix (e.g. "%# this is ignored") ───────────────────
+ // Line comments match anywhere on a line, not just at the start.
+ if (lineCommentPrefix != null && regionMatches(currPost, lineCommentPrefix)) {
+ return handleLineComment();
+ }
+ // ── Variable opener e.g. "{{" or "\VAR{" ──────────────────────────────
+ if (regionMatches(currPost, varStart)) {
+ return openBlock(varStart, symbols.getExprStart(), false);
+ }
+ // ── Block opener e.g. "{%" or "\BLOCK{" ───────────────────────────────
+ if (regionMatches(currPost, blkStart)) {
+ return openBlock(blkStart, symbols.getTag(), false);
+ }
+ // ── Comment opener e.g. "{#" or "\#{" ─────────────────────────────────
+ if (regionMatches(currPost, cmtStart)) {
+ return openBlock(cmtStart, symbols.getNote(), true);
+ }
+ return null; // nothing matched
+ }
+
+ /**
+ * Scans for the endraw block opener while in raw mode.
+ * Returns a real token, {@link #DELIMITER_MATCHED}, or {@code null}.
+ */
+ private Token scanRawMode() {
+ if (regionMatches(currPost, blkStart)) {
+ int contentStart = currPost + blkStart.length;
+ int pos = contentStart;
+ while (pos < length && Character.isWhitespace(is[pos])) {
+ pos++;
+ }
+ if (charArrayRegionMatches(is, pos, "endraw")) {
+ Token pending = flushTextBefore(currPost);
+ blockOpenerStart = currPost;
+ tokenStart = contentStart;
+ currPost = tokenStart;
+ tokenKind = symbols.getTag();
+ inBlock = 1;
+ if (pending != null) {
+ return pending;
+ }
+ return DELIMITER_MATCHED;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Opens a variable or tag block (sets {@code inBlock}) or a comment block
+ * (sets {@code inComment}). Flushes any pending text first.
+ * Returns the pending text token if one exists, {@link #DELIMITER_MATCHED} otherwise.
+ */
+ private Token openBlock(char[] opener, int kind, boolean isComment) {
+ Token pending = flushTextBefore(currPost);
+ blockOpenerStart = currPost;
+ tokenStart = currPost + opener.length;
+ currPost = tokenStart;
+ tokenKind = kind;
+ if (isComment) {
+ inComment = 1;
+ } else {
+ inBlock = 1;
+ }
+ return (pending != null) ? pending : DELIMITER_MATCHED;
+ }
+
+ /**
+ * Handles a line statement prefix: consumes the line, builds a synthetic block
+ * tag token, and returns appropriately (stashing the tag if text was pending).
+ */
+ private Token handleLineStatement() {
+ Token pending = flushTextBefore(lineIndentStart(currPost));
+
+ int contentStart = currPost + lineStmtPrefix.length;
+ while (contentStart < length && is[contentStart] == ' ') {
+ contentStart++;
+ }
+ int contentEnd = contentStart;
+ while (contentEnd < length && is[contentEnd] != '\n') {
+ contentEnd++;
+ }
+ // Do NOT trim inner here — TagToken.parse() calls handleTrim() which detects
+ // a leading '-' for left-trim whitespace control and a trailing '-' for
+ // right-trim. Trimming here would strip those control characters before
+ // TagToken ever sees them.
+ // Also do not insert a space before the content when it starts with the
+ // trim char '-', as that space would prevent handleTrim from detecting it.
+ String inner = String.valueOf(is, contentStart, contentEnd - contentStart);
+ String prefix = (inner.length() > 0 && inner.charAt(0) == symbols.getTrimChar())
+ ? symbols.getExpressionStartWithTag()
+ : symbols.getExpressionStartWithTag() + " ";
+ String syntheticImage = prefix + inner + " " + symbols.getExpressionEndWithTag();
+
+ int next = contentEnd;
+ if (next < length && is[next] == '\n') {
+ next++;
+ currLine++;
+ lastNewlinePos = next;
+ }
+
+ // When lstrip_blocks is active, Python Jinja2 also consumes any blank lines
+ // that follow a line statement (lines containing only horizontal whitespace).
+ // This prevents blank lines between consecutive line statements from
+ // appearing in the output.
+ if (config.isLstripBlocks()) {
+ while (next < length) {
+ // Scan forward past any horizontal whitespace on this line.
+ int lineEnd = next;
+ while (
+ lineEnd < length &&
+ is[lineEnd] != '\n' &&
+ (is[lineEnd] == ' ' || is[lineEnd] == '\t')
+ ) {
+ lineEnd++;
+ }
+ // If we hit a newline (blank or whitespace-only line), consume it.
+ if (lineEnd < length && is[lineEnd] == '\n') {
+ next = lineEnd + 1;
+ currLine++;
+ lastNewlinePos = next;
+ } else {
+ // Hit real content or end of input — stop consuming.
+ break;
+ }
+ }
+ }
+
+ tokenStart = next;
+ currPost = next;
+
+ Token stmtToken = Token.newToken(
+ symbols.getTag(),
+ symbols,
+ whitespaceControlParser,
+ syntheticImage,
+ currLine,
+ 1
+ );
+ if (pending != null) {
+ pendingToken = stmtToken;
+ return pending;
+ }
+ return stmtToken;
+ }
+
+ /**
+ * Handles a line comment prefix.
+ *
+ * Line comments match anywhere on a line (not just at the start).
+ * For mid-line comments, everything from the prefix to end of line is
+ * stripped; the text before the prefix on the same line is kept.
+ *
+ *
Confirmed Python Jinja2 semantics:
+ *
+ * - Plain {@code %#}: comment content stripped, own trailing
+ * {@code \n} kept. Replaces the comment (and anything after it on
+ * the line) with a blank line / line ending.
+ * - {@code %#-} at start of line: also strips preceding blank
+ * lines and the {@code \n} ending the last real-content line.
+ * - {@code %#-} mid-line: behaves like plain {@code %#} — the
+ * {@code -} has nothing to left-trim when real content precedes it.
+ *
+ */
+ private Token handleLineComment() {
+ boolean startOfLine = isStartOfLine(currPost);
+ int afterPrefix = currPost + lineCommentPrefix.length;
+ boolean hasTrimModifier =
+ afterPrefix < length && is[afterPrefix] == symbols.getTrimChar();
+
+ int flushUpTo;
+ if (!startOfLine) {
+ // Mid-line comment: flush up to the %# prefix, stripping trailing
+ // horizontal whitespace before it (Python strips spaces/tabs before
+ // mid-line comments, e.g. "hello %# comment" → "hello").
+ int p = currPost - 1;
+ while (p >= tokenStart && (is[p] == ' ' || is[p] == '\t')) {
+ p--;
+ }
+ flushUpTo = p + 1;
+ } else if (hasTrimModifier) {
+ // Start-of-line %#-: strip preceding blank lines and the real-content \n.
+ flushUpTo = lineIndentStartSkippingBlanks(currPost);
+ } else {
+ // Start-of-line %#: strip only the current line's indentation.
+ flushUpTo = lineIndentStart(currPost);
+ }
+
+ Token pending = flushTextBefore(flushUpTo);
+
+ // Advance past the comment content to the end of the line.
+ int end = afterPrefix;
+ while (end < length && is[end] != '\n') {
+ end++;
+ }
+
+ // Both %# and %#- keep the trailing \n — it appears in the output.
+ tokenStart = end;
+ currPost = end;
+
+ return (pending != null) ? pending : DELIMITER_MATCHED;
+ }
+
+ /**
+ * Returns the position of the first character of the indentation on the line
+ * containing {@code pos} — i.e. the position just after the preceding newline
+ * (or 0 if at the start of input). Used to exclude leading horizontal whitespace
+ * from the text token flushed before a line prefix match.
+ */
+ private int lineIndentStart(int pos) {
+ int p = pos - 1;
+ while (p >= 0 && (is[p] == ' ' || is[p] == '\t')) {
+ p--;
+ }
+ // p is now at the newline before the indentation, or at -1.
+ return p + 1;
+ }
+
+ /**
+ * Returns the flush boundary for a {@code %#-} line comment.
+ *
+ * Python Jinja2 semantics for {@code %#-}: strip back through any preceding
+ * blank lines AND the {@code \n} that ends the last real-content line, so that
+ * the comment's own kept {@code \n} becomes the sole separator. Stops at
+ * {@code tokenStart} so that {@code \n}s produced by preceding line statements
+ * or plain {@code %#} comments are not consumed.
+ *
+ *
Examples (| marks the flush boundary):
+ *
+ * "A\n\n%#-" → flush "A|" → output "A" + comment's \n
+ * "%% set\n%#-" → flush nothing → output comment's \n (tokenStart guard)
+ *
+ */
+ private int lineIndentStartSkippingBlanks(int pos) {
+ int p = pos - 1;
+ while (p >= tokenStart) {
+ // Skip trailing horizontal whitespace on this line (going backwards).
+ while (p >= tokenStart && (is[p] == ' ' || is[p] == '\t')) {
+ p--;
+ }
+ if (p < tokenStart) {
+ break;
+ }
+ if (is[p] == '\n') {
+ // Blank line — consume this \n and keep scanning backwards.
+ p--;
+ } else {
+ // Real content at position p. The \n ending this line is at p+1.
+ // Return p+1 so flushTextBefore(p+1) flushes up to but NOT including
+ // that \n, stripping it from the output.
+ return p + 1;
+ }
+ }
+ // Reached tokenStart without finding real content — all blank lines were
+ // preceded by a line statement or plain comment. Preserve them.
+ return tokenStart;
+ }
+
+ // ── One-slot stash for the synthetic tag after a line-statement ─────────
+ // When a line-statement prefix is found and there is pending text to flush
+ // first, we return the text token immediately and stash the synthetic tag
+ // here so computeNext() picks it up on the very next call.
+ private Token pendingToken = null;
+
+ @Override
+ protected Token computeNext() {
+ // Drain any stashed token first.
+ if (pendingToken != null) {
+ Token t = pendingToken;
+ pendingToken = null;
+ return t;
+ }
+
+ Token t = getNextToken();
+ if (t == null) {
+ return endOfData();
+ }
+ return t;
+ }
+
+ // ── Helpers ───────────────────────────────────────────────────────────────
+
+ /**
+ * Returns true when {@code pos} is at the start of a line — i.e. it is either
+ * the very first character of the input, or the character immediately after a
+ * newline (accounting for any leading whitespace that lstripBlocks may allow).
+ */
+ private boolean isStartOfLine(int pos) {
+ if (pos == 0) {
+ return true;
+ }
+ // Walk backwards past any horizontal whitespace (spaces/tabs).
+ int p = pos - 1;
+ while (p >= 0 && (is[p] == ' ' || is[p] == '\t')) {
+ p--;
+ }
+ // True if we hit the beginning of the input or a newline.
+ return p < 0 || is[p] == '\n';
+ }
+
+ /**
+ * If {@code is[tokenStart..upTo)} contains un-emitted plain text, captures it
+ * as a TEXT token and returns it. Returns {@code null} for zero-length regions.
+ * Does NOT update {@code tokenStart} — the caller sets it after returning.
+ */
+ private Token flushTextBefore(int upTo) {
+ int textLen = upTo - tokenStart;
+ if (textLen <= 0) {
+ return null;
+ }
+ lastStart = tokenStart;
+ tokenLength = textLen;
+ return emitToken(symbols.getFixed());
+ }
+
+ /** Returns the closing delimiter for the currently open block kind. */
+ private char[] closingDelimFor(int currentKind) {
+ if (currentKind == symbols.getExprStart()) {
+ return varEnd;
+ }
+ if (currentKind == symbols.getTag()) {
+ return blkEnd;
+ }
+ if (currentKind == symbols.getNote()) {
+ return cmtEnd;
+ }
+ return null;
+ }
+
+ /**
+ * Constructs a token from {@code lastStart}/{@code tokenLength}, then applies
+ * trimBlocks and raw-mode post-processing identical to the char-based path.
+ */
+ private Token emitToken(int kind) {
+ Token t = Token.newToken(
+ kind,
+ symbols,
+ whitespaceControlParser,
+ String.valueOf(is, lastStart, tokenLength),
+ currLine,
+ lastStart - lastNewlinePos + 1
+ );
+
+ if (
+ (t instanceof TagToken || t instanceof NoteToken) &&
+ config.isTrimBlocks() &&
+ currPost < length &&
+ is[currPost] == '\n'
+ ) {
+ lastNewlinePos = currPost + 1;
+ ++currPost;
+ ++tokenStart;
+ }
+
+ if (t instanceof TagToken) {
+ TagToken tt = (TagToken) t;
+ if ("raw".equals(tt.getTagName())) {
+ inRaw = 1;
+ return tt;
+ } else if ("endraw".equals(tt.getTagName())) {
+ inRaw = 0;
+ return tt;
+ }
+ }
+
+ if (inRaw > 0 && t.getType() != symbols.getFixed()) {
+ return Token.newToken(
+ symbols.getFixed(),
+ symbols,
+ whitespaceControlParser,
+ t.image,
+ currLine,
+ lastStart - lastNewlinePos + 1
+ );
+ }
+
+ return t;
+ }
+
+ /**
+ * Emits whatever remains at end-of-input.
+ * Advances {@code tokenStart = currPost} so subsequent calls return null.
+ */
+ private Token getEndToken() {
+ tokenLength = currPost - tokenStart;
+ lastStart = tokenStart;
+ tokenStart = currPost;
+ int type = symbols.getFixed();
+ if (inComment > 0) {
+ type = symbols.getNote();
+ } else if (inBlock > 0) {
+ return new UnclosedToken(
+ String.valueOf(is, lastStart, tokenLength),
+ currLine,
+ lastStart - lastNewlinePos + 1,
+ symbols,
+ whitespaceControlParser
+ );
+ }
+ return Token.newToken(
+ type,
+ symbols,
+ whitespaceControlParser,
+ String.valueOf(is, lastStart, tokenLength),
+ currLine,
+ lastStart - lastNewlinePos + 1
+ );
+ }
+
+ /** Returns true if {@code is[pos..]} starts with {@code pattern}. */
+ private boolean regionMatches(int pos, char[] pattern) {
+ if (pos + pattern.length > length) {
+ return false;
+ }
+ for (int i = 0; i < pattern.length; i++) {
+ if (is[pos + i] != pattern[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbols.java b/src/main/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbols.java
new file mode 100644
index 000000000..242abd241
--- /dev/null
+++ b/src/main/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbols.java
@@ -0,0 +1,269 @@
+/**********************************************************************
+ * Copyright (c) 2014 HubSpot Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **********************************************************************/
+package com.hubspot.jinjava.tree.parse;
+
+/**
+ * A {@link TokenScannerSymbols} implementation that supports arbitrary multi-character
+ * delimiter strings, addressing
+ * issue #195.
+ *
+ * Unlike {@link DefaultTokenScannerSymbols}, which is constrained to single-character
+ * prefixes and postfixes, this class allows any non-empty string for each of the six
+ * delimiter roles. The delimiters do not need to share a common prefix character.
+ *
+ *
{@link TokenScanner} detects this class via {@link #isStringBased()} and activates
+ * a string-matching scan path. {@link ExpressionToken}, {@link TagToken}, and
+ * {@link NoteToken} use the length accessors on {@link TokenScannerSymbols} (e.g.
+ * {@link #getExpressionStartLength()}) to strip delimiters correctly regardless of length.
+ *
+ *
The single-character abstract methods inherited from {@link TokenScannerSymbols}
+ * return private Unicode Private-Use-Area sentinel values. These are used only as
+ * token-kind discriminators inside {@link Token#newToken} and must never be used for
+ * scanning template text.
+ *
+ *
Example
+ * {@code
+ * JinjavaConfig config = JinjavaConfig.newBuilder()
+ * .withTokenScannerSymbols(StringTokenScannerSymbols.builder()
+ * .withVariableStartString("\\VAR{")
+ * .withVariableEndString("}")
+ * .withBlockStartString("\\BLOCK{")
+ * .withBlockEndString("}")
+ * .withCommentStartString("\\#{")
+ * .withCommentEndString("}")
+ * .build())
+ * .build();
+ * }
+ */
+public class StringTokenScannerSymbols extends TokenScannerSymbols {
+
+ private static final long serialVersionUID = 1L;
+
+ // ── Internal sentinel chars ────────────────────────────────────────────────
+ // Unicode Private Use Area values — guaranteed never to appear in real template
+ // text, so Token.newToken()'s if-chain dispatches to the right Token subclass.
+ static final char SENTINEL_FIXED = '\uE000';
+ static final char SENTINEL_NOTE = '\uE001';
+ static final char SENTINEL_TAG = '\uE002';
+ static final char SENTINEL_EXPR_START = '\uE003';
+ static final char SENTINEL_EXPR_END = '\uE004';
+ static final char SENTINEL_PREFIX = '\uE005'; // unused for scanning
+ static final char SENTINEL_POSTFIX = '\uE006'; // unused for scanning
+ static final char SENTINEL_NEWLINE = '\n'; // real newline for line tracking
+ static final char SENTINEL_TRIM = '-'; // real trim char
+
+ // ── The configured string delimiters ──────────────────────────────────────
+ private final String variableStartString;
+ private final String variableEndString;
+ private final String blockStartString;
+ private final String blockEndString;
+ private final String commentStartString;
+ private final String commentEndString;
+ // Optional; null means disabled.
+ private final String lineStatementPrefix;
+ private final String lineCommentPrefix;
+
+ private StringTokenScannerSymbols(Builder builder) {
+ this.variableStartString = builder.variableStartString;
+ this.variableEndString = builder.variableEndString;
+ this.blockStartString = builder.blockStartString;
+ this.blockEndString = builder.blockEndString;
+ this.commentStartString = builder.commentStartString;
+ this.commentEndString = builder.commentEndString;
+ this.lineStatementPrefix = builder.lineStatementPrefix;
+ this.lineCommentPrefix = builder.lineCommentPrefix;
+ }
+
+ // ── Abstract char contract — returns sentinels only ───────────────────────
+
+ @Override
+ public char getPrefixChar() {
+ return SENTINEL_PREFIX;
+ }
+
+ @Override
+ public char getPostfixChar() {
+ return SENTINEL_POSTFIX;
+ }
+
+ @Override
+ public char getFixedChar() {
+ return SENTINEL_FIXED;
+ }
+
+ @Override
+ public char getNoteChar() {
+ return SENTINEL_NOTE;
+ }
+
+ @Override
+ public char getTagChar() {
+ return SENTINEL_TAG;
+ }
+
+ @Override
+ public char getExprStartChar() {
+ return SENTINEL_EXPR_START;
+ }
+
+ @Override
+ public char getExprEndChar() {
+ return SENTINEL_EXPR_END;
+ }
+
+ @Override
+ public char getNewlineChar() {
+ return SENTINEL_NEWLINE;
+ }
+
+ @Override
+ public char getTrimChar() {
+ return SENTINEL_TRIM;
+ }
+
+ // ── String-level getters: MUST override the base-class lazy cache ──────────
+ // The base class builds these from the char methods above, which would produce
+ // garbage sentinel strings. We override them to return the real delimiters so
+ // that ExpressionToken, TagToken, and NoteToken strip content correctly.
+
+ @Override
+ public String getExpressionStart() {
+ return variableStartString;
+ }
+
+ @Override
+ public String getExpressionEnd() {
+ return variableEndString;
+ }
+
+ @Override
+ public String getExpressionStartWithTag() {
+ return blockStartString;
+ }
+
+ @Override
+ public String getExpressionEndWithTag() {
+ return blockEndString;
+ }
+
+ @Override
+ public String getOpeningComment() {
+ return commentStartString;
+ }
+
+ @Override
+ public String getClosingComment() {
+ return commentEndString;
+ }
+
+ @Override
+ public String getLineStatementPrefix() {
+ return lineStatementPrefix;
+ }
+
+ @Override
+ public String getLineCommentPrefix() {
+ return lineCommentPrefix;
+ }
+
+ // ── isStringBased flag ────────────────────────────────────────────────────
+
+ @Override
+ public boolean isStringBased() {
+ return true;
+ }
+
+ // ── Builder ────────────────────────────────────────────────────────────────
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ public static final class Builder {
+
+ // Defaults mirror the standard Jinja2 delimiters, so building with no
+ // overrides behaves identically to DefaultTokenScannerSymbols.
+ private String variableStartString = "{{";
+ private String variableEndString = "}}";
+ private String blockStartString = "{%";
+ private String blockEndString = "%}";
+ private String commentStartString = "{#";
+ private String commentEndString = "#}";
+ private String lineStatementPrefix = null; // disabled by default
+ private String lineCommentPrefix = null; // disabled by default
+
+ public Builder withVariableStartString(String s) {
+ this.variableStartString = requireNonEmpty(s, "variableStartString");
+ return this;
+ }
+
+ public Builder withVariableEndString(String s) {
+ this.variableEndString = requireNonEmpty(s, "variableEndString");
+ return this;
+ }
+
+ public Builder withBlockStartString(String s) {
+ this.blockStartString = requireNonEmpty(s, "blockStartString");
+ return this;
+ }
+
+ public Builder withBlockEndString(String s) {
+ this.blockEndString = requireNonEmpty(s, "blockEndString");
+ return this;
+ }
+
+ public Builder withCommentStartString(String s) {
+ this.commentStartString = requireNonEmpty(s, "commentStartString");
+ return this;
+ }
+
+ public Builder withCommentEndString(String s) {
+ this.commentEndString = requireNonEmpty(s, "commentEndString");
+ return this;
+ }
+
+ /**
+ * Sets the line statement prefix (e.g. {@code "%%"}). A line beginning with
+ * this prefix is treated as a block tag, equivalent to wrapping its content
+ * in the configured block delimiters. Pass {@code null} to disable (default).
+ */
+ public Builder withLineStatementPrefix(String s) {
+ this.lineStatementPrefix = s;
+ return this;
+ }
+
+ /**
+ * Sets the line comment prefix (e.g. {@code "%#"}). A line beginning with
+ * this prefix is stripped entirely from the output. Pass {@code null} to
+ * disable (default).
+ */
+ public Builder withLineCommentPrefix(String s) {
+ this.lineCommentPrefix = s;
+ return this;
+ }
+
+ public StringTokenScannerSymbols build() {
+ return new StringTokenScannerSymbols(this);
+ }
+
+ private static String requireNonEmpty(String value, String name) {
+ if (value == null || value.isEmpty()) {
+ throw new IllegalArgumentException(name + " must not be null or empty");
+ }
+ return value;
+ }
+ }
+}
diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/TagToken.java b/src/main/java/com/hubspot/jinjava/tree/parse/TagToken.java
index a737dd96c..0c500c145 100644
--- a/src/main/java/com/hubspot/jinjava/tree/parse/TagToken.java
+++ b/src/main/java/com/hubspot/jinjava/tree/parse/TagToken.java
@@ -54,7 +54,10 @@ public int getType() {
*/
@Override
protected void parse() {
- if (image.length() < 4) {
+ int startLen = getSymbols().getTagStartLength();
+ int endLen = getSymbols().getTagEndLength();
+
+ if (image.length() < startLen + endLen) {
throw new TemplateSyntaxException(
image,
"Malformed tag token",
@@ -63,7 +66,7 @@ protected void parse() {
);
}
- content = image.substring(2, image.length() - 2);
+ content = image.substring(startLen, image.length() - endLen);
content = handleTrim(content);
int nameStart = -1, pos = 0, len = content.length();
diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java b/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java
index 7e53b295a..de3b6e040 100644
--- a/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java
+++ b/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java
@@ -21,6 +21,13 @@
import com.hubspot.jinjava.JinjavaConfig;
import com.hubspot.jinjava.features.BuiltInFeatures;
+/**
+ * Character-based token scanner for the standard single-character-prefix delimiter
+ * scheme (e.g. {@code {{}, {@code {%}, {@code {#}).
+ *
+ * When {@link TokenScannerSymbols#isStringBased()} is {@code true},
+ * {@link TreeParser} uses {@link StringTokenScanner} instead.
+ */
public class TokenScanner extends AbstractIterator {
private final JinjavaConfig config;
@@ -42,6 +49,11 @@ public class TokenScanner extends AbstractIterator {
private final TokenScannerSymbols symbols;
private final WhitespaceControlParser whitespaceControlParser;
+ // When true, backslash is treated as an escape character only inside quoted
+ // string literals, matching Jinja2 behaviour. When false (legacy default),
+ // the scanner consumes backslash + next char unconditionally.
+ private final boolean backslashInQuotesOnly;
+
public TokenScanner(String input, JinjavaConfig config) {
this.config = config;
@@ -64,6 +76,7 @@ public TokenScanner(String input, JinjavaConfig config) {
config.getLegacyOverrides().isParseWhitespaceControlStrictly()
? WhitespaceControlParser.STRICT
: WhitespaceControlParser.LENIENT;
+ backslashInQuotesOnly = config.getLegacyOverrides().isHandleBackslashInQuotesOnly();
}
private Token getNextToken() {
@@ -75,10 +88,14 @@ private Token getNextToken() {
}
if (inBlock > 0) {
- if (c == '\\') {
+ if (c == '\\' && !backslashInQuotesOnly) {
++currPost;
continue;
} else if (inQuote != 0) {
+ if (c == '\\') {
+ ++currPost;
+ continue;
+ }
if (inQuote == c) {
inQuote = 0;
}
diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/TokenScannerSymbols.java b/src/main/java/com/hubspot/jinjava/tree/parse/TokenScannerSymbols.java
index 771dbda41..638220853 100644
--- a/src/main/java/com/hubspot/jinjava/tree/parse/TokenScannerSymbols.java
+++ b/src/main/java/com/hubspot/jinjava/tree/parse/TokenScannerSymbols.java
@@ -129,4 +129,90 @@ public static boolean isNoteTagOrExprChar(TokenScannerSymbols symbols, char c) {
c == symbols.getNote() || c == symbols.getTag() || c == symbols.getExprStartChar()
);
}
+
+ // ── New API ────────────────────────────────────────────────────────────────
+
+ /**
+ * Returns {@code true} if this instance uses arbitrary string delimiters that
+ * require the string-matching scan path in {@link TokenScanner}.
+ *
+ * The default returns {@code false}, so all existing subclasses are unaffected.
+ * {@link StringTokenScannerSymbols} overrides this to return {@code true}.
+ */
+ public boolean isStringBased() {
+ return false;
+ }
+
+ /**
+ * Length of the variable/expression opening delimiter (e.g. 2 for {@code "{{"}),
+ * used by {@link ExpressionToken#parse()} instead of the hardcoded constant 2.
+ */
+ public int getExpressionStartLength() {
+ return getExpressionStart().length();
+ }
+
+ /**
+ * Length of the variable/expression closing delimiter (e.g. 2 for {@code "}}"}),
+ * used by {@link ExpressionToken#parse()} instead of the hardcoded constant 2.
+ */
+ public int getExpressionEndLength() {
+ return getExpressionEnd().length();
+ }
+
+ /**
+ * Length of the block/tag opening delimiter (e.g. 2 for {@code "{%"}),
+ * used by {@link TagToken#parse()} instead of the hardcoded constant 2.
+ */
+ public int getTagStartLength() {
+ return getExpressionStartWithTag().length();
+ }
+
+ /**
+ * Length of the block/tag closing delimiter (e.g. 2 for {@code "%}"}),
+ * used by {@link TagToken#parse()} instead of the hardcoded constant 2.
+ */
+ public int getTagEndLength() {
+ return getExpressionEndWithTag().length();
+ }
+
+ /**
+ * Length of the comment opening delimiter (e.g. 2 for {@code "{#"}),
+ * used by {@link NoteToken#parse()} instead of the hardcoded constant 2.
+ */
+ public int getCommentStartLength() {
+ return getOpeningComment().length();
+ }
+
+ /**
+ * Length of the comment closing delimiter (e.g. 2 for {@code "#}"}),
+ * used by {@link NoteToken#parse()} instead of the hardcoded constant 2.
+ */
+ public int getCommentEndLength() {
+ return getClosingComment().length();
+ }
+
+ /**
+ * Optional line statement prefix (e.g. {@code "%%"}). When non-null, any line
+ * that begins with this prefix (after optional horizontal whitespace) is treated
+ * as a block tag statement, equivalent to wrapping its content in the block
+ * delimiters. Returns {@code null} by default (feature disabled).
+ *
+ *
Only used by {@link StringTokenScannerSymbols}; has no effect in the
+ * char-based path.
+ */
+ public String getLineStatementPrefix() {
+ return null;
+ }
+
+ /**
+ * Optional line comment prefix (e.g. {@code "%#"}). When non-null, any line
+ * that begins with this prefix (after optional horizontal whitespace) is stripped
+ * entirely from the output. Returns {@code null} by default (feature disabled).
+ *
+ *
Only used by {@link StringTokenScannerSymbols}; has no effect in the
+ * char-based path.
+ */
+ public String getLineCommentPrefix() {
+ return null;
+ }
}
diff --git a/src/test/java/com/hubspot/jinjava/tree/parse/BackslashHandlingTest.java b/src/test/java/com/hubspot/jinjava/tree/parse/BackslashHandlingTest.java
new file mode 100644
index 000000000..0515e4e10
--- /dev/null
+++ b/src/test/java/com/hubspot/jinjava/tree/parse/BackslashHandlingTest.java
@@ -0,0 +1,234 @@
+package com.hubspot.jinjava.tree.parse;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import com.google.common.collect.AbstractIterator;
+import com.google.common.collect.ImmutableMap;
+import com.hubspot.jinjava.Jinjava;
+import com.hubspot.jinjava.JinjavaConfig;
+import com.hubspot.jinjava.LegacyOverrides;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import org.junit.Test;
+
+/**
+ * Tests for backslash handling inside block/variable/comment delimiters,
+ * covering both the char-based (DefaultTokenScannerSymbols) and string-based
+ * (StringTokenScannerSymbols) scanning paths, with the
+ * {@link LegacyOverrides#isHandleBackslashInQuotesOnly()} flag both off (legacy)
+ * and on (Jinja2-compatible).
+ */
+public class BackslashHandlingTest {
+
+ // ── Jinjava instances ──────────────────────────────────────────────────────
+
+ /** Char-based scanner, legacy backslash behaviour (flag = false). */
+ private static Jinjava charLegacy() {
+ return new Jinjava(
+ JinjavaConfig
+ .newBuilder()
+ .withLegacyOverrides(LegacyOverrides.newBuilder().build())
+ .build()
+ );
+ }
+
+ /** Char-based scanner, Jinja2-compatible backslash behaviour (flag = true). */
+ private static Jinjava charNew() {
+ return new Jinjava(
+ JinjavaConfig
+ .newBuilder()
+ .withLegacyOverrides(
+ LegacyOverrides.newBuilder().withHandleBackslashInQuotesOnly(true).build()
+ )
+ .build()
+ );
+ }
+
+ /** String-based scanner, legacy backslash behaviour (flag = false). */
+ private static Jinjava stringLegacy() {
+ return new Jinjava(
+ JinjavaConfig
+ .newBuilder()
+ .withTokenScannerSymbols(StringTokenScannerSymbols.builder().build())
+ .withLegacyOverrides(LegacyOverrides.newBuilder().build())
+ .build()
+ );
+ }
+
+ /** String-based scanner, Jinja2-compatible backslash behaviour (flag = true). */
+ private static Jinjava stringNew() {
+ return new Jinjava(
+ JinjavaConfig
+ .newBuilder()
+ .withTokenScannerSymbols(StringTokenScannerSymbols.builder().build())
+ .withLegacyOverrides(
+ LegacyOverrides.newBuilder().withHandleBackslashInQuotesOnly(true).build()
+ )
+ .build()
+ );
+ }
+
+ // ── Backslash inside a quoted string ──────────────────────────────────────
+ //
+ // Both legacy and new behaviour must handle escaped quotes inside strings
+ // correctly — \" should not close the string.
+
+ @Test
+ public void charLegacy_escapedQuoteInsideString() {
+ assertThat(charLegacy().render("{{ \"he said \\\"hi\\\"\" }}", new HashMap<>()))
+ .isEqualTo("he said \"hi\"");
+ }
+
+ @Test
+ public void charNew_escapedQuoteInsideString() {
+ assertThat(charNew().render("{{ \"he said \\\"hi\\\"\" }}", new HashMap<>()))
+ .isEqualTo("he said \"hi\"");
+ }
+
+ @Test
+ public void stringLegacy_escapedQuoteInsideString() {
+ assertThat(stringLegacy().render("{{ \"he said \\\"hi\\\"\" }}", new HashMap<>()))
+ .isEqualTo("he said \"hi\"");
+ }
+
+ @Test
+ public void stringNew_escapedQuoteInsideString() {
+ assertThat(stringNew().render("{{ \"he said \\\"hi\\\"\" }}", new HashMap<>()))
+ .isEqualTo("he said \"hi\"");
+ }
+
+ // ── Backslash outside a quoted string ─────────────────────────────────────
+ //
+ // Template under test: "prefix {{ x \}} suffix }}"
+ //
+ // We test the scanner token structure directly rather than going through
+ // render(), because the expression "x \..." is always a JUEL lexical error
+ // regardless of mode. What differs between modes is which token boundaries
+ // the scanner produces — and that is what we assert on.
+ //
+ // Legacy (backslashInQuotesOnly = false):
+ // Scanner consumes '\' and skips the following '}'. The first '}}' is not
+ // recognized as a closer. The block runs until the second '}}', so the
+ // token sequence is:
+ // TEXT "prefix " | EXPR "{{ x \}} suffix }}"
+ //
+ // New (backslashInQuotesOnly = true):
+ // Scanner leaves '\' untouched. The first '}}' is recognized as the closer.
+ // The token sequence is:
+ // TEXT "prefix " | EXPR "{{ x \}}" | TEXT " suffix }}"
+
+ private static final String BACKSLASH_TEMPLATE = "prefix {{ x \\}} suffix }}";
+
+ @Test
+ public void charLegacy_backslashConsumesOneDelimiterChar_blockRunsToSecondCloser() {
+ List tokens = scanAll(
+ new TokenScanner(BACKSLASH_TEMPLATE, charLegacy().getGlobalConfig())
+ );
+ assertThat(tokens).hasSize(2);
+ assertThat(tokens.get(0)).isInstanceOf(TextToken.class);
+ assertThat(tokens.get(0).image).isEqualTo("prefix ");
+ assertThat(tokens.get(1)).isInstanceOf(ExpressionToken.class);
+ assertThat(tokens.get(1).image).isEqualTo("{{ x \\}} suffix }}");
+ }
+
+ @Test
+ public void charNew_backslashIgnored_blockClosesAtFirstDelimiter() {
+ List tokens = scanAll(
+ new TokenScanner(BACKSLASH_TEMPLATE, charNew().getGlobalConfig())
+ );
+ assertThat(tokens).hasSize(3);
+ assertThat(tokens.get(0)).isInstanceOf(TextToken.class);
+ assertThat(tokens.get(0).image).isEqualTo("prefix ");
+ assertThat(tokens.get(1)).isInstanceOf(ExpressionToken.class);
+ assertThat(tokens.get(1).image).isEqualTo("{{ x \\}}");
+ assertThat(tokens.get(2)).isInstanceOf(TextToken.class);
+ assertThat(tokens.get(2).image).isEqualTo(" suffix }}");
+ }
+
+ @Test
+ public void stringLegacy_backslashConsumesOneDelimiterChar_blockRunsToSecondCloser() {
+ List tokens = scanAll(
+ new StringTokenScanner(BACKSLASH_TEMPLATE, stringLegacy().getGlobalConfig())
+ );
+ assertThat(tokens).hasSize(2);
+ assertThat(tokens.get(0)).isInstanceOf(TextToken.class);
+ assertThat(tokens.get(0).image).isEqualTo("prefix ");
+ assertThat(tokens.get(1)).isInstanceOf(ExpressionToken.class);
+ assertThat(tokens.get(1).image).isEqualTo("{{ x \\}} suffix }}");
+ }
+
+ @Test
+ public void stringNew_backslashIgnored_blockClosesAtFirstDelimiter() {
+ List tokens = scanAll(
+ new StringTokenScanner(BACKSLASH_TEMPLATE, stringNew().getGlobalConfig())
+ );
+ assertThat(tokens).hasSize(3);
+ assertThat(tokens.get(0)).isInstanceOf(TextToken.class);
+ assertThat(tokens.get(0).image).isEqualTo("prefix ");
+ assertThat(tokens.get(1)).isInstanceOf(ExpressionToken.class);
+ assertThat(tokens.get(1).image).isEqualTo("{{ x \\}}");
+ assertThat(tokens.get(2)).isInstanceOf(TextToken.class);
+ assertThat(tokens.get(2).image).isEqualTo(" suffix }}");
+ }
+
+ private static List scanAll(AbstractIterator scanner) {
+ List tokens = new ArrayList<>();
+ scanner.forEachRemaining(tokens::add);
+ return tokens;
+ }
+
+ // ── Backslash in a plain variable expression ───────────────────────────────
+ //
+ // The most common real-world case: a Windows path or similar string passed
+ // directly as a variable value. The backslash is in the *value*, not the
+ // template, so scanner behaviour is irrelevant — both modes should render
+ // identically.
+
+ @Test
+ public void backslashInVariableValueIsUnaffectedByFlag_char() {
+ ImmutableMap ctx = ImmutableMap.of("path", "C:\\Users\\foo");
+ assertThat(charLegacy().render("{{ path }}", ctx)).isEqualTo("C:\\Users\\foo");
+ assertThat(charNew().render("{{ path }}", ctx)).isEqualTo("C:\\Users\\foo");
+ }
+
+ @Test
+ public void backslashInVariableValueIsUnaffectedByFlag_string() {
+ ImmutableMap ctx = ImmutableMap.of("path", "C:\\Users\\foo");
+ assertThat(stringLegacy().render("{{ path }}", ctx)).isEqualTo("C:\\Users\\foo");
+ assertThat(stringNew().render("{{ path }}", ctx)).isEqualTo("C:\\Users\\foo");
+ }
+
+ // ── New behaviour: simple expressions are unaffected ──────────────────────
+ //
+ // Expressions with no backslash should behave identically under both modes.
+
+ @Test
+ public void charNew_simpleExpressionUnchanged() {
+ assertThat(charNew().render("{{ greeting }}", ImmutableMap.of("greeting", "hello")))
+ .isEqualTo("hello");
+ }
+
+ @Test
+ public void stringNew_simpleExpressionUnchanged() {
+ assertThat(stringNew().render("{{ greeting }}", ImmutableMap.of("greeting", "hello")))
+ .isEqualTo("hello");
+ }
+
+ // ── LegacyOverrides preset assertions ─────────────────────────────────────
+
+ @Test
+ public void allPresetDoesNotEnableNewBackslashHandling() {
+ assertThat(LegacyOverrides.ALL.isHandleBackslashInQuotesOnly()).isTrue();
+ }
+
+ @Test
+ public void threePointZeroPresetDoesNotEnableNewBackslashHandling() {
+ assertThat(LegacyOverrides.THREE_POINT_0.isHandleBackslashInQuotesOnly()).isTrue();
+ }
+
+ @Test
+ public void nonePresetKeepsLegacyBackslashHandling() {
+ assertThat(LegacyOverrides.NONE.isHandleBackslashInQuotesOnly()).isFalse();
+ }
+}
diff --git a/src/test/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbolsTest.java b/src/test/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbolsTest.java
new file mode 100644
index 000000000..a03fe836e
--- /dev/null
+++ b/src/test/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbolsTest.java
@@ -0,0 +1,509 @@
+package com.hubspot.jinjava.tree.parse;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
+import com.hubspot.jinjava.BaseJinjavaTest;
+import com.hubspot.jinjava.Jinjava;
+import com.hubspot.jinjava.JinjavaConfig;
+import com.hubspot.jinjava.lib.filter.JoinFilterTest.User;
+import java.util.HashMap;
+import org.junit.Before;
+import org.junit.Test;
+
+public class StringTokenScannerSymbolsTest {
+
+ // ── Shared symbol configurations ───────────────────────────────────────────
+
+ /** LaTeX-style delimiters as used in the original issue #195 example. */
+ private static final StringTokenScannerSymbols LATEX_SYMBOLS = StringTokenScannerSymbols
+ .builder()
+ .withVariableStartString("\\VAR{")
+ .withVariableEndString("}")
+ .withBlockStartString("\\BLOCK{")
+ .withBlockEndString("}")
+ .withCommentStartString("\\#{")
+ .withCommentEndString("}")
+ .build();
+
+ /** Angle-bracket style — same delimiters as the existing CustomTokenScannerSymbolsTest. */
+ private static final StringTokenScannerSymbols ANGLE_SYMBOLS = StringTokenScannerSymbols
+ .builder()
+ .withVariableStartString("<<")
+ .withVariableEndString(">>")
+ .withBlockStartString("<%")
+ .withBlockEndString("%>")
+ .withCommentStartString("<#")
+ .withCommentEndString("#>")
+ .build();
+
+ private Jinjava latexJinjava;
+ private Jinjava angleJinjava;
+
+ @Before
+ public void setup() {
+ latexJinjava =
+ new Jinjava(
+ BaseJinjavaTest.newConfigBuilder().withTokenScannerSymbols(LATEX_SYMBOLS).build()
+ );
+ latexJinjava
+ .getGlobalContext()
+ .put("numbers", Lists.newArrayList(1L, 2L, 3L, 4L, 5L));
+
+ angleJinjava =
+ new Jinjava(
+ BaseJinjavaTest.newConfigBuilder().withTokenScannerSymbols(ANGLE_SYMBOLS).build()
+ );
+ angleJinjava
+ .getGlobalContext()
+ .put("numbers", Lists.newArrayList(1L, 2L, 3L, 4L, 5L));
+ }
+
+ // ── Plain text ─────────────────────────────────────────────────────────────
+
+ @Test
+ public void itRendersPlainText() {
+ String template = "jinjava interpreter works correctly";
+ assertThat(latexJinjava.render(template, new HashMap<>())).isEqualTo(template);
+ assertThat(angleJinjava.render(template, new HashMap<>())).isEqualTo(template);
+ }
+
+ // ── Variable expressions ───────────────────────────────────────────────────
+
+ @Test
+ public void itRendersVariablesWithLatexSymbols() {
+ assertThat(latexJinjava.render("\\VAR{ name }", ImmutableMap.of("name", "World")))
+ .isEqualTo("World");
+ }
+
+ @Test
+ public void itRendersVariablesWithAngleSymbols() {
+ assertThat(angleJinjava.render("<< name >>", ImmutableMap.of("name", "World")))
+ .isEqualTo("World");
+ }
+
+ // ── Default delimiters pass through as literal text ────────────────────────
+
+ @Test
+ public void itPassesThroughDefaultCurlyBracesAsLiteralText() {
+ // With custom delimiters, {{ }} must be treated as plain text, not expressions.
+ assertThat(
+ latexJinjava.render(
+ "{{ not a variable }} \\VAR{ name }",
+ ImmutableMap.of("name", "Jorge")
+ )
+ )
+ .isEqualTo("{{ not a variable }} Jorge");
+
+ assertThat(
+ angleJinjava.render(
+ "{{ not a variable }} << name >>",
+ ImmutableMap.of("name", "Jorge")
+ )
+ )
+ .isEqualTo("{{ not a variable }} Jorge");
+ }
+
+ // ── Block tags ─────────────────────────────────────────────────────────────
+
+ @Test
+ public void itRendersIfBlockWithLatexSymbols() {
+ assertThat(
+ latexJinjava.render(
+ "\\BLOCK{ if show }hello\\BLOCK{ endif }",
+ ImmutableMap.of("show", true)
+ )
+ )
+ .isEqualTo("hello");
+
+ assertThat(
+ latexJinjava.render(
+ "\\BLOCK{ if show }hello\\BLOCK{ endif }",
+ ImmutableMap.of("show", false)
+ )
+ )
+ .isEqualTo("");
+ }
+
+ @Test
+ public void itRendersSetBlockWithAngleSymbols() {
+ assertThat(
+ angleJinjava.render(
+ "<% set d=d | default(\"some random value\") %><< d >>",
+ new HashMap<>()
+ )
+ )
+ .isEqualTo("some random value");
+ }
+
+ // ── Comments ───────────────────────────────────────────────────────────────
+
+ @Test
+ public void itStripsCommentsWithLatexSymbols() {
+ assertThat(latexJinjava.render("before\\#{ this is ignored }after", new HashMap<>()))
+ .isEqualTo("beforeafter");
+ }
+
+ @Test
+ public void itStripsCommentsWithAngleSymbols() {
+ assertThat(angleJinjava.render("before<# this is ignored #>after", new HashMap<>()))
+ .isEqualTo("beforeafter");
+ }
+
+ // ── Filters ────────────────────────────────────────────────────────────────
+
+ @Test
+ public void itRendersFiltersWithLatexSymbols() {
+ assertThat(latexJinjava.render("\\VAR{ [1, 2, 3, 3]|union(null) }", new HashMap<>()))
+ .isEqualTo("[1, 2, 3]");
+ assertThat(
+ latexJinjava.render("\\VAR{ numbers|select('equalto', 3) }", new HashMap<>())
+ )
+ .isEqualTo("[3]");
+ }
+
+ @Test
+ public void itRendersFiltersWithAngleSymbols() {
+ assertThat(angleJinjava.render("<< [1, 2, 3, 3]|union(null) >>", new HashMap<>()))
+ .isEqualTo("[1, 2, 3]");
+ assertThat(angleJinjava.render("<< numbers|select('equalto', 3) >>", new HashMap<>()))
+ .isEqualTo("[3]");
+ }
+
+ @Test
+ public void itRendersMapFilterWithLatexSymbols() {
+ assertThat(
+ latexJinjava.render(
+ "\\VAR{ users|map(attribute='username')|join(', ') }",
+ ImmutableMap.of(
+ "users",
+ (Object) Lists.newArrayList(new User("foo"), new User("bar"))
+ )
+ )
+ )
+ .isEqualTo("foo, bar");
+ }
+
+ @Test
+ public void itRendersMapFilterWithAngleSymbols() {
+ assertThat(
+ angleJinjava.render(
+ "<< users|map(attribute='username')|join(', ') >>",
+ ImmutableMap.of(
+ "users",
+ (Object) Lists.newArrayList(new User("foo"), new User("bar"))
+ )
+ )
+ )
+ .isEqualTo("foo, bar");
+ }
+
+ // ── Delimiter characters inside string literals in expressions ─────────────
+
+ @Test
+ public void itHandlesClosingDelimiterInsideQuotedString() {
+ // The "}" inside the default string must not prematurely close \VAR{
+ assertThat(latexJinjava.render("\\VAR{ name | default(\"}\") }", new HashMap<>()))
+ .isEqualTo("}");
+ }
+
+ @Test
+ public void itHandlesClosingDelimiterInsideQuotedStringAngle() {
+ // ">>" inside a quoted string must not close the << expression
+ assertThat(angleJinjava.render("<< name | default(\">>\") >>", new HashMap<>()))
+ .isEqualTo(">>");
+ }
+
+ // ── Builder defaults produce same behaviour as DefaultTokenScannerSymbols ──
+
+ @Test
+ public void defaultBuilderBehavesLikeDefaultSymbols() {
+ Jinjava defaultJinjava = new Jinjava();
+ Jinjava stringBasedDefaultJinjava = new Jinjava(
+ JinjavaConfig
+ .newBuilder()
+ .withTokenScannerSymbols(StringTokenScannerSymbols.builder().build())
+ .build()
+ );
+ String template = "{{ greeting }}, {{ name }}!";
+ ImmutableMap ctx = ImmutableMap.of(
+ "greeting",
+ "Hello",
+ "name",
+ "World"
+ );
+ assertThat(stringBasedDefaultJinjava.render(template, ctx))
+ .isEqualTo(defaultJinjava.render(template, ctx));
+ }
+
+ // ── trimBlocks and lstripBlocks ────────────────────────────────────────────
+ //
+ // trimBlocks is handled in TokenScanner.emitStringToken(): when a TagToken or
+ // NoteToken is emitted and trimBlocks=true, the immediately following newline
+ // is consumed. This is equally true in the string-based path.
+ //
+ // lstripBlocks is handled in TreeParser, which operates on the token stream
+ // produced by TokenScanner. It strips leading horizontal whitespace from any
+ // TextNode that immediately precedes a TagNode. Since TreeParser is path-agnostic,
+ // lstripBlocks works identically for both char-based and string-based scanning.
+
+ @Test
+ public void itRespectsTrimBlocksWithAngleSymbols() {
+ Jinjava j = new Jinjava(
+ BaseJinjavaTest
+ .newConfigBuilder()
+ .withTokenScannerSymbols(ANGLE_SYMBOLS)
+ .withTrimBlocks(true)
+ .build()
+ );
+ // Without trimBlocks the newline after <% if show %> would appear in output.
+ // With trimBlocks=true it is consumed by the scanner, so output is "hello".
+ String result = j.render(
+ "<% if show %>\nhello\n<% endif %>",
+ ImmutableMap.of("show", true)
+ );
+ assertThat(result).isEqualTo("hello\n");
+ }
+
+ @Test
+ public void itRespectsTrimBlocksWithLatexSymbols() {
+ Jinjava j = new Jinjava(
+ BaseJinjavaTest
+ .newConfigBuilder()
+ .withTokenScannerSymbols(LATEX_SYMBOLS)
+ .withTrimBlocks(true)
+ .build()
+ );
+ String result = j.render(
+ "\\BLOCK{ if show }\nhello\n\\BLOCK{ endif }",
+ ImmutableMap.of("show", true)
+ );
+ assertThat(result).isEqualTo("hello\n");
+ }
+
+ @Test
+ public void itRespectsLstripBlocksWithAngleSymbols() {
+ Jinjava j = new Jinjava(
+ BaseJinjavaTest
+ .newConfigBuilder()
+ .withTokenScannerSymbols(ANGLE_SYMBOLS)
+ .withLstripBlocks(true)
+ .withTrimBlocks(true)
+ .build()
+ );
+ // Leading spaces before the tag are stripped by lstripBlocks (TreeParser).
+ // The newline after the tag is consumed by trimBlocks (TokenScanner).
+ String result = j.render(
+ " <% if show %>\nhello\n <% endif %>",
+ ImmutableMap.of("show", true)
+ );
+ assertThat(result).isEqualTo("hello\n");
+ }
+
+ @Test
+ public void itRespectsLstripBlocksWithLatexSymbols() {
+ Jinjava j = new Jinjava(
+ BaseJinjavaTest
+ .newConfigBuilder()
+ .withTokenScannerSymbols(LATEX_SYMBOLS)
+ .withLstripBlocks(true)
+ .withTrimBlocks(true)
+ .build()
+ );
+ String result = j.render(
+ " \\BLOCK{ if show }\nhello\n \\BLOCK{ endif }",
+ ImmutableMap.of("show", true)
+ );
+ assertThat(result).isEqualTo("hello\n");
+ }
+
+ @Test
+ public void builderRejectsEmptyDelimiter() {
+ assertThatThrownBy(() ->
+ StringTokenScannerSymbols.builder().withVariableStartString("").build()
+ )
+ .isInstanceOf(IllegalArgumentException.class);
+ }
+
+ @Test
+ public void builderRejectsNullDelimiter() {
+ assertThatThrownBy(() ->
+ StringTokenScannerSymbols.builder().withBlockEndString(null).build()
+ )
+ .isInstanceOf(IllegalArgumentException.class);
+ }
+
+ // ── Line statement prefix ──────────────────────────────────────────────────
+
+ @Test
+ public void itRendersLineStatementPrefix() {
+ Jinjava j = jinjavaWith(
+ StringTokenScannerSymbols.builder().withLineStatementPrefix("%%").build()
+ );
+ // "%% if show" is equivalent to "{% if show %}"
+ String template = "%% if show\nhello\n%% endif";
+ assertThat(j.render(template, ImmutableMap.of("show", true))).isEqualTo("hello\n");
+ assertThat(j.render(template, ImmutableMap.of("show", false))).isEqualTo("");
+ }
+
+ @Test
+ public void itRendersLineStatementPrefixWithWhitespaceControl() {
+ Jinjava j = new Jinjava(
+ BaseJinjavaTest
+ .newConfigBuilder()
+ .withTokenScannerSymbols(
+ StringTokenScannerSymbols.builder().withLineStatementPrefix("%%").build()
+ )
+ .withTrimBlocks(true)
+ .withLstripBlocks(true)
+ .build()
+ );
+ // "%%- for" strips the newline before the line (leftTrim).
+ // trimBlocks consumes the newline after each tag line.
+ // Expected: the \n after {| is stripped, c| repeated col_num times, each
+ // followed by \n (from the body line), with the \n after c| stripped by
+ // the leftTrim on %%- endfor.
+ String template = "before|\n%%- for _ in range(3)\nc|\n%%- endfor\nafter";
+ assertThat(j.render(template, ImmutableMap.of())).isEqualTo("before|c|c|c|after");
+ }
+
+ @Test
+ public void itRendersLineStatementPrefixWithLeadingWhitespace() {
+ Jinjava j = jinjavaWith(
+ StringTokenScannerSymbols.builder().withLineStatementPrefix("%%").build()
+ );
+ // Leading spaces before the prefix are allowed
+ String template = " %% if show\nhello\n %% endif";
+ assertThat(j.render(template, ImmutableMap.of("show", true))).isEqualTo("hello\n");
+ }
+
+ @Test
+ public void itRendersLineStatementMixedWithBlockDelimiters() {
+ Jinjava j = jinjavaWith(
+ StringTokenScannerSymbols
+ .builder()
+ .withVariableStartString("<<")
+ .withVariableEndString(">>")
+ .withBlockStartString("<%")
+ .withBlockEndString("%>")
+ .withCommentStartString("<#")
+ .withCommentEndString("#>")
+ .withLineStatementPrefix("%%")
+ .build()
+ );
+ String template = "%% set x = 42\n<< x >>";
+ assertThat(j.render(template, new HashMap<>())).isEqualTo("42");
+ }
+
+ // ── Line comment prefix ────────────────────────────────────────────────────
+ //
+ // Ground truth confirmed by running both Python Jinja2 and Jinjava against:
+ // [START]
+ // %% set x = 1
+ // [A]
+ // %# plain comment
+ // [B]
+ // %#- trim comment
+ // [C]
+ // %% set y = 2
+ // [D]
+ // [END]
+ //
+ // Python output: [START]\n[A]\n\n[B]\n[C]\n[D]\n[END]
+ //
+ // Semantics:
+ // %# (plain): comment content stripped, trailing \n KEPT → blank line where comment was
+ // %#- (trim): comment content AND trailing \n stripped → no blank line
+ // Neither form affects the newline that ended the preceding line.
+
+ @Test
+ public void itStripsLineCommentPrefixLeavingBlankLine() {
+ Jinjava j = jinjavaWith(
+ StringTokenScannerSymbols.builder().withLineCommentPrefix("%#").build()
+ );
+ // %# keeps its trailing \n → "before\n" + "\n" (comment's own \n) + "after"
+ String template = "before\n%# this whole line is a comment\nafter";
+ assertThat(j.render(template, new HashMap<>())).isEqualTo("before\n\nafter");
+ }
+
+ @Test
+ public void itStripsLineCommentWithLeadingWhitespace() {
+ Jinjava j = jinjavaWith(
+ StringTokenScannerSymbols.builder().withLineCommentPrefix("%#").build()
+ );
+ // Indentation before %# is stripped, trailing \n is kept → blank line
+ String template = "before\n %# indented comment\nafter";
+ assertThat(j.render(template, new HashMap<>())).isEqualTo("before\n\nafter");
+ }
+
+ @Test
+ public void itStripsLineCommentWithTrimModifier() {
+ Jinjava j = jinjavaWith(
+ StringTokenScannerSymbols.builder().withLineCommentPrefix("%#").build()
+ );
+ // %# keeps trailing \n (blank line left in output)
+ assertThat(j.render("before\n%# comment\nafter", new HashMap<>()))
+ .isEqualTo("before\n\nafter");
+ // %#- also keeps trailing \n — the '-' is LEFT-trim only (strips preceding blanks)
+ // With no preceding blank lines, result is identical to plain %#
+ assertThat(j.render("before\n%#- comment\nafter", new HashMap<>()))
+ .isEqualTo("before\nafter");
+ // %#- with a preceding blank line: strips the blank, keeps own trailing \n
+ assertThat(j.render("before\n\n%#- comment\nafter", new HashMap<>()))
+ .isEqualTo("before\nafter");
+ }
+
+ @Test
+ public void itStripsLineCommentWithoutLeavingBlankLine() {
+ // %#- with real content before (no blank): strips the preceding \n,
+ // keeps comment's own \n. "\\begin{document}" + "\n" (comment's \n) + "\\section*{...}"
+ Jinjava j = new Jinjava(
+ BaseJinjavaTest
+ .newConfigBuilder()
+ .withTokenScannerSymbols(
+ StringTokenScannerSymbols
+ .builder()
+ .withVariableStartString("\\VAR{")
+ .withVariableEndString("}")
+ .withLineCommentPrefix("%#")
+ .build()
+ )
+ .build()
+ );
+ String template =
+ "\\begin{document}\n%#-\\VAR{reportHeader}\n\\section*{\\VAR{title}}";
+ String result = j.render(template, ImmutableMap.of("title", "My Report"));
+ assertThat(result).isEqualTo("\\begin{document}\n\\section*{My Report}");
+ }
+
+ @Test
+ public void itHandlesBothLinePrefixesTogether() {
+ Jinjava j = jinjavaWith(
+ StringTokenScannerSymbols
+ .builder()
+ .withVariableStartString("<<")
+ .withVariableEndString(">>")
+ .withBlockStartString("<%")
+ .withBlockEndString("%>")
+ .withCommentStartString("<#")
+ .withCommentEndString("#>")
+ .withLineStatementPrefix("%%")
+ .withLineCommentPrefix("%#")
+ .build()
+ );
+ String template = "%# this is stripped\n%% set x = 7\n<< x >>";
+ // %# keeps its trailing \n → blank line, then %% set produces nothing,
+ // then << x >> renders as 7. Result: "\n7"
+ assertThat(j.render(template, new HashMap<>())).isEqualTo("\n7");
+ }
+
+ // ── Helper ────────────────────────────────────────────────────────────────
+
+ private Jinjava jinjavaWith(StringTokenScannerSymbols symbols) {
+ return new Jinjava(
+ BaseJinjavaTest.newConfigBuilder().withTokenScannerSymbols(symbols).build()
+ );
+ }
+}
diff --git a/src/test/java/com/hubspot/jinjava/tree/parse/TokenScannerTest.java b/src/test/java/com/hubspot/jinjava/tree/parse/TokenScannerTest.java
index 4261b4fc2..2c5c1c8e4 100644
--- a/src/test/java/com/hubspot/jinjava/tree/parse/TokenScannerTest.java
+++ b/src/test/java/com/hubspot/jinjava/tree/parse/TokenScannerTest.java
@@ -8,6 +8,7 @@
import com.google.common.io.Resources;
import com.hubspot.jinjava.BaseJinjavaTest;
import com.hubspot.jinjava.JinjavaConfig;
+import com.hubspot.jinjava.LegacyOverrides;
import com.hubspot.jinjava.features.BuiltInFeatures;
import com.hubspot.jinjava.features.FeatureConfig;
import com.hubspot.jinjava.features.FeatureStrategies;
@@ -319,6 +320,13 @@ public void testLstripBlocks() {
@Test
public void itTreatsEscapedQuotesSameWhenNotInQuotes() {
+ config =
+ BaseJinjavaTest
+ .newConfigBuilder()
+ .withLegacyOverrides(
+ LegacyOverrides.newBuilder().withHandleBackslashInQuotesOnly(false).build()
+ )
+ .build();
List tokens = tokens("tag-with-all-escaped-quotes");
assertThat(tokens).hasSize(8);
assertThat(tokens.stream().map(Token::getType).collect(Collectors.toList()))