Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion src/prism.c
Original file line number Diff line number Diff line change
Expand Up @@ -9783,6 +9783,12 @@ parser_lex(pm_parser_t *parser) {
unsigned int semantic_token_seen = parser->semantic_token_seen;
parser->semantic_token_seen = true;

// We'll jump to this label when we are about to encounter an EOF.
// If we still have lex_modes on the stack, we pop them so that cleanup
// can happen. For example, we should still continue parsing after a heredoc
// identifier, even if the heredoc body was syntax invalid.
switch_lex_modes:

switch (parser->lex_modes.current->mode) {
case PM_LEX_DEFAULT:
case PM_LEX_EMBEXPR:
Expand Down Expand Up @@ -9856,6 +9862,14 @@ parser_lex(pm_parser_t *parser) {
// We'll check if we're at the end of the file. If we are, then we
// need to return the EOF token.
if (parser->current.end >= parser->end) {
// We may be missing closing tokens. We should pop modes one by one
// to do the appropriate cleanup like moving next_start for heredocs.
// Only when no mode is remaining will we actually emit the EOF token.
if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) {
lex_mode_pop(parser);
goto switch_lex_modes;
}

// If we hit EOF, but the EOF came immediately after a newline,
// set the start of the token to the newline. This way any EOF
// errors will be reported as happening on that line rather than
Expand Down Expand Up @@ -15433,7 +15447,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) {
pm_token_t opening = parser->previous;
pm_statements_node_t *statements = NULL;

if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
if (!match3(parser, PM_TOKEN_EMBEXPR_END, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
Copy link
Collaborator Author

@Earlopain Earlopain Feb 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#{ currently gives the interpolation a statement with MissingNode. That's not correct, the missing end token leads to syntax error. It also messes up the locations when it finds the synthetic heredoc end token. I don't think there are any other tokens to consider here, should be all hopefully.

# Before
Prism.parse("\"\#{").value.statements.body[0]
=> 
@ InterpolatedStringNode (location: (1,0)-(1,3))
├── flags: newline
├── opening_loc: (1,0)-(1,1) = "\""
├── parts: (length: 1)
   └── @ EmbeddedStatementsNode (location: (1,1)-(1,3))
       ├── flags: 
       ├── opening_loc: (1,1)-(1,3) = "\#{"
       ├── statements:
          @ StatementsNode (location: (1,1)-(1,3))
          ├── flags: 
          └── body: (length: 1)
              └── @ MissingNode (location: (1,1)-(1,3))
                  └── flags: 
       └── closing_loc: (1,3)-(1,3) = ""
└── closing_loc: 

# After
Prism.parse("\"\#{").value.statements.body[0]
=> 
@ InterpolatedStringNode (location: (1,0)-(1,3))
├── flags: newline
├── opening_loc: (1,0)-(1,1) = "\""
├── parts: (length: 1)
   └── @ EmbeddedStatementsNode (location: (1,1)-(1,3))
       ├── flags: 
       ├── opening_loc: (1,1)-(1,3) = "\#{"
       ├── statements: 
       └── closing_loc: (1,3)-(1,3) = ""
└── closing_loc: 

pm_accepts_block_stack_push(parser, true);
statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
Expand Down
11 changes: 11 additions & 0 deletions test/prism/errors/unterminated_heredoc_and_embexpr.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<<A+B
^ unterminated heredoc; can't find string "A" anywhere before EOF
^ unexpected '+', ignoring it
^ unterminated heredoc; can't find string "A" anywhere before EOF
#{C
^ unexpected heredoc ending; expected an argument
^ unexpected heredoc ending, expecting end-of-input
^ unexpected heredoc ending, ignoring it
^ unexpected end-of-input, assuming it is closing the parent top level context
^ expected a `}` to close the embedded expression

9 changes: 9 additions & 0 deletions test/prism/errors/unterminated_heredoc_and_embexpr_2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<<A+B
^ unterminated heredoc; can't find string "A" anywhere before EOF
#{C + "#{"}
^ unterminated string meets end of file
^ unexpected end-of-input, assuming it is closing the parent top level context
^ expected a `}` to close the embedded expression
^ unterminated string; expected a closing delimiter for the interpolated string
^ expected a `}` to close the embedded expression

22 changes: 22 additions & 0 deletions test/prism/errors_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,28 @@ def test_incomplete_def_closing_loc
assert_nil(statement.end_keyword)
end

def test_unclosed_interpolation
statement = Prism.parse_statement("\"\#{")
assert_equal('"', statement.opening)
assert_nil(statement.closing)

assert_equal(1, statement.parts.count)
assert_equal('#{', statement.parts[0].opening)
assert_equal("", statement.parts[0].closing)
assert_nil(statement.parts[0].statements)
end

def test_unclosed_heredoc_and_interpolation
statement = Prism.parse_statement("<<D\n\#{")
assert_equal("<<D", statement.opening)
assert_nil(statement.closing)

assert_equal(1, statement.parts.count)
assert_equal('#{', statement.parts[0].opening)
assert_equal("", statement.parts[0].closing)
assert_nil(statement.parts[0].statements)
end

private

def assert_errors(filepath, version)
Expand Down
53 changes: 50 additions & 3 deletions test/prism/lex_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,58 @@ def test_parse_lex_file
end

if RUBY_VERSION >= "3.3"
def test_lex_compare
prism = Prism.lex_compat(File.read(__FILE__), version: "current").value
ripper = Ripper.lex(File.read(__FILE__))
def test_lex_compat
source = "foo bar"
prism = Prism.lex_compat(source, version: "current").value
ripper = Ripper.lex(source)
assert_equal(ripper, prism)
end
end

def test_lex_interpolation_unterminated
assert_equal(
%i[STRING_BEGIN EMBEXPR_BEGIN EOF],
token_types('"#{')
)

assert_equal(
%i[STRING_BEGIN EMBEXPR_BEGIN IGNORED_NEWLINE EOF],
token_types('"#{' + "\n")
)
end

def test_lex_interpolation_unterminated_with_content
# FIXME: Emits EOL twice.
assert_equal(
%i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT EOF EOF],
token_types('"#{C')
)

assert_equal(
%i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT NEWLINE EOF],
token_types('"#{C' + "\n")
)
end

def test_lex_heredoc_unterminated
code = <<~'RUBY'.strip
<<A+B
#{C
RUBY

assert_equal(
%i[HEREDOC_START EMBEXPR_BEGIN CONSTANT HEREDOC_END PLUS CONSTANT NEWLINE EOF],
token_types(code)
)

assert_equal(
%i[HEREDOC_START EMBEXPR_BEGIN CONSTANT NEWLINE HEREDOC_END PLUS CONSTANT NEWLINE EOF],
token_types(code + "\n")
)
end

def token_types(code)
Prism.lex(code).value.map { |token, _state| token.type }
end
end
end