From b98494db2332e1f7877d098914fc4f4a01a9f66c Mon Sep 17 00:00:00 2001
From: Earlopain <14981592+Earlopain@users.noreply.github.com>
Date: Thu, 12 Feb 2026 16:57:14 +0100
Subject: [PATCH 1/2] Run tests against some bundled gems

They make extensive use of prism.
---
 .github/workflows/cruby-bindings.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/cruby-bindings.yml b/.github/workflows/cruby-bindings.yml
index d635e550f0..e9cd55d8f6 100644
--- a/.github/workflows/cruby-bindings.yml
+++ b/.github/workflows/cruby-bindings.yml
@@ -57,3 +57,7 @@ jobs:
           EXCLUDES: ${{ matrix.parser == 'parse.y' && './test/.excludes-parsey' }}
         run: make -j$(nproc) -s test-all
         working-directory: ruby/ruby
+      - name: make test-bundled-gems
+        if: ${{ matrix.parser == 'prism' }}
+        run: make -j$(nproc) -s test-bundled-gems BUNDLED_GEMS=irb,rbs,rdoc,syntax_suggest
+        working-directory: ruby/ruby

From 82d953fcad3dd64fb8cc53f34f0b02b245b573c5 Mon Sep 17 00:00:00 2001
From: Earlopain <14981592+Earlopain@users.noreply.github.com>
Date: Wed, 11 Feb 2026 13:03:49 +0100
Subject: [PATCH 2/2] Fix lexing for unterminated strings/heredocs etc.

When we hit EOF and still have lex modes left, it means some content was unterminated.
Heredocs specifically have logic that needs to happen when the body finished lexing.
If we don't reset the mode back to how it was before, it will not continue lexing at the correct place.
---
 src/prism.c                                   |  9 ++++
 .../unterminated_heredoc_and_embexpr.txt      | 11 ++++
 .../unterminated_heredoc_and_embexpr_2.txt    |  9 ++++
 test/prism/lex_test.rb                        | 53 +++++++++++++++++--
 4 files changed, 79 insertions(+), 3 deletions(-)
 create mode 100644 test/prism/errors/unterminated_heredoc_and_embexpr.txt
 create mode 100644 test/prism/errors/unterminated_heredoc_and_embexpr_2.txt

diff --git a/src/prism.c b/src/prism.c
index 34e5d38b0a..097b4c7305 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -9856,6 +9856,15 @@ parser_lex(pm_parser_t *parser) {
             // We'll check if we're at the end of the file. If we are, then we
             // need to return the EOF token.
             if (parser->current.end >= parser->end) {
+                // We may be missing closing tokens. We should pop modes one by one
+                // to do the appropriate cleanup like moving next_start for heredocs.
+                // Only when no mode is remaining will we actually emit the EOF token.
+                if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) {
+                    lex_mode_pop(parser);
+                    parser_lex(parser);
+                    return;
+                }
+
                 // If we hit EOF, but the EOF came immediately after a newline,
                 // set the start of the token to the newline.  This way any EOF
                 // errors will be reported as happening on that line rather than
diff --git a/test/prism/errors/unterminated_heredoc_and_embexpr.txt b/test/prism/errors/unterminated_heredoc_and_embexpr.txt
new file mode 100644
index 0000000000..bed7fcd24e
--- /dev/null
+++ b/test/prism/errors/unterminated_heredoc_and_embexpr.txt
@@ -0,0 +1,11 @@
+<<A+B
+  ^ unterminated heredoc; can't find string "A" anywhere before EOF
+   ^ unexpected '+', ignoring it
+  ^ unterminated heredoc; can't find string "A" anywhere before EOF
+#{C
+   ^ unexpected heredoc ending; expected an argument
+   ^ unexpected heredoc ending, expecting end-of-input
+   ^ unexpected heredoc ending, ignoring it
+   ^ unexpected end-of-input, assuming it is closing the parent top level context
+^ expected a `}` to close the embedded expression
+
diff --git a/test/prism/errors/unterminated_heredoc_and_embexpr_2.txt b/test/prism/errors/unterminated_heredoc_and_embexpr_2.txt
new file mode 100644
index 0000000000..a03ff1d212
--- /dev/null
+++ b/test/prism/errors/unterminated_heredoc_and_embexpr_2.txt
@@ -0,0 +1,9 @@
+<<A+B
+  ^ unterminated heredoc; can't find string "A" anywhere before EOF
+#{C + "#{"}
+           ^ unterminated string meets end of file
+           ^ unexpected end-of-input, assuming it is closing the parent top level context
+           ^ expected a `}` to close the embedded expression
+      ^ unterminated string; expected a closing delimiter for the interpolated string
+           ^ expected a `}` to close the embedded expression
+
diff --git a/test/prism/lex_test.rb b/test/prism/lex_test.rb
index 9a9f203c28..8ea7ce7e9b 100644
--- a/test/prism/lex_test.rb
+++ b/test/prism/lex_test.rb
@@ -48,11 +48,58 @@ def test_parse_lex_file
     end
 
     if RUBY_VERSION >= "3.3"
-      def test_lex_compare
-        prism = Prism.lex_compat(File.read(__FILE__), version: "current").value
-        ripper = Ripper.lex(File.read(__FILE__))
+      def test_lex_compat
+        source = "foo bar"
+        prism = Prism.lex_compat(source, version: "current").value
+        ripper = Ripper.lex(source)
         assert_equal(ripper, prism)
       end
     end
+
+    def test_lex_interpolation_unterminated
+      assert_equal(
+        %i[STRING_BEGIN EMBEXPR_BEGIN EOF],
+        token_types('"#{')
+      )
+
+      assert_equal(
+        %i[STRING_BEGIN EMBEXPR_BEGIN IGNORED_NEWLINE EOF],
+        token_types('"#{' + "\n")
+      )
+    end
+
+    def test_lex_interpolation_unterminated_with_content
+      # FIXME: Emits EOL twice.
+      assert_equal(
+        %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT EOF EOF],
+        token_types('"#{C')
+      )
+
+      assert_equal(
+        %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT NEWLINE EOF],
+        token_types('"#{C' + "\n")
+      )
+    end
+
+    def test_lex_heredoc_unterminated
+      code = <<~'RUBY'.strip
+        <<A+B
+        #{C
+      RUBY
+
+      assert_equal(
+        %i[HEREDOC_START EMBEXPR_BEGIN CONSTANT HEREDOC_END PLUS CONSTANT NEWLINE EOF],
+        token_types(code)
+      )
+
+      assert_equal(
+        %i[HEREDOC_START EMBEXPR_BEGIN CONSTANT NEWLINE HEREDOC_END PLUS CONSTANT NEWLINE EOF],
+        token_types(code + "\n")
+      )
+    end
+
+    def token_types(code)
+      Prism.lex(code).value.map { |token, _state| token.type }
+    end
   end
 end