diff --git a/.github/workflows/jruby.yml b/.github/workflows/jruby.yml new file mode 100644 index 000000000..651e33236 --- /dev/null +++ b/.github/workflows/jruby.yml @@ -0,0 +1,65 @@ +name: JRuby + +on: + push: + branches: + - master + pull_request: + paths: + - ".github/workflows/jruby.yml" + - "include/**" + - "src/**" + - "wasm/**" + - "lib/rbs/wasm/**" + - "lib/rbs.rb" + - "Rakefile" + +permissions: + contents: read + +env: + # Keep in sync with .github/workflows/wasm.yml. + WASI_SDK_VERSION: "33" + WASI_SDK_RELEASE: "33.0" + +jobs: + test: + name: jruby + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - run: git fetch --depth=1 origin +refs/tags/*:refs/tags/* + + # Build the .wasm and fetch the Chicory jars with CRuby + the WASI SDK, + # then run RBS itself on JRuby against those artifacts. + - name: Set up Ruby (to assemble the WebAssembly runtime) + uses: ruby/setup-ruby@v1 + with: + ruby-version: ruby + bundler: none + - name: Update rubygems & bundler + run: gem update --system + - name: Install gems + run: | + bundle config set --local without libs:profilers + bundle install --jobs 4 --retry 3 + - name: Install the WASI SDK + run: | + url="https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VERSION}/wasi-sdk-${WASI_SDK_RELEASE}-x86_64-linux.tar.gz" + mkdir -p "$HOME/wasi-sdk" + curl -sSL "$url" | tar xz --strip-components=1 -C "$HOME/wasi-sdk" + echo "WASI_SDK_PATH=$HOME/wasi-sdk" >> "$GITHUB_ENV" + - name: Assemble the JRuby runtime (rbs_parser.wasm + Chicory jars) + run: bundle exec rake wasm:jruby_setup + + - name: Set up JRuby + uses: ruby/setup-ruby@v1 + with: + ruby-version: jruby + bundler: none + - name: Install runtime and test gems + run: gem install prism test-unit --no-document + - name: Run RBS's parser on JRuby + run: | + jruby -Ilib -Itest test/rbs/wasm/jruby_parser_test.rb + jruby -Ilib -Itest test/rbs/parser_test.rb diff --git a/.gitignore b/.gitignore index e7e523cd4..93c5b4a5c 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,7 @@ rust/ruby-rbs/vendor/rbs/ # Compiled WebAssembly module (built by rake wasm:build) wasm/*.wasm + +# JRuby runtime artifacts (assembled by rake wasm:jruby_setup, bundled in the JRuby gem) +lib/rbs/wasm/*.wasm +lib/rbs/wasm/jars/ diff --git a/Rakefile b/Rakefile index 9cf0e66cc..9b46e77b3 100644 --- a/Rakefile +++ b/Rakefile @@ -609,16 +609,52 @@ namespace :wasm do task :check => :build do wasmtime = ENV["WASMTIME"] || "wasmtime" - # `rbs_wasm_selftest` parses a small fixed signature and returns 0 on + # `rbs_wasm_selftest` parses a small fixed signature and returns 1 on # success. `--invoke` prints the return value to stdout. output = IO.popen([wasmtime, "run", "--invoke", "rbs_wasm_selftest", WASM_OUTPUT], err: File::NULL, &:read).to_s.strip - if output == "0" + if output == "1" puts "WebAssembly selftest passed." else - raise "WebAssembly selftest failed: rbs_wasm_selftest returned #{output.inspect} (expected \"0\")" + raise "WebAssembly selftest failed: rbs_wasm_selftest returned #{output.inspect} (expected \"1\")" end end + + # Where the runtime looks for the module and jars by default (see + # RBS::WASM::Runtime). These are build artifacts, bundled into the JRuby gem. + JRUBY_WASM_DIR = File.expand_path("lib/rbs/wasm", __dir__) + CHICORY_VERSION = ENV.fetch("CHICORY_VERSION", "1.7.5") + # `compiler` is Chicory's AOT compiler (wasm -> JVM bytecode); the asm* jars + # are the ow2 ASM libraries it depends on. Keep ASM_VERSION in sync with what + # the pinned Chicory release declares. + CHICORY_JARS = %w[wasm runtime log wasi compiler].freeze + ASM_VERSION = ENV.fetch("ASM_VERSION", "9.9.1") + ASM_JARS = %w[asm asm-tree asm-util asm-commons asm-analysis].freeze + + desc "Download the Chicory and ASM jars the JRuby runtime needs into lib/rbs/wasm/jars" + task :vendor_jars do + require "open-uri" + require "fileutils" + + jars_dir = File.join(JRUBY_WASM_DIR, "jars") + FileUtils.mkdir_p(jars_dir) + + downloads = CHICORY_JARS.map { |name| ["#{name}.jar", "https://repo1.maven.org/maven2/com/dylibso/chicory/#{name}/#{CHICORY_VERSION}/#{name}-#{CHICORY_VERSION}.jar"] } + downloads += ASM_JARS.map { |name| ["#{name}.jar", "https://repo1.maven.org/maven2/org/ow2/asm/#{name}/#{ASM_VERSION}/#{name}-#{ASM_VERSION}.jar"] } + + downloads.each do |filename, url| + puts "Downloading #{url}" + URI.open(url) { |io| File.binwrite(File.join(jars_dir, filename), io.read) } # steep:ignore + end + + puts "Vendored Chicory #{CHICORY_VERSION} + ASM #{ASM_VERSION} into #{jars_dir}" + end + + desc "Assemble everything the JRuby gem needs: the .wasm and the Chicory jars" + task :jruby_setup => [:build, :vendor_jars] do + cp WASM_OUTPUT, File.join(JRUBY_WASM_DIR, "rbs_parser.wasm") + puts "JRuby runtime is ready under #{JRUBY_WASM_DIR}" + end end namespace :rust do diff --git a/Steepfile b/Steepfile index b00e4542b..d1bc69f40 100644 --- a/Steepfile +++ b/Steepfile @@ -6,6 +6,13 @@ target :lib do ignore( "lib/rbs/test", # "lib/rbs/test.rb" + + # JRuby-only implementations of RBS::Location and RBS::Parser. Like the C + # extension, these implement interfaces already described in sig/, and + # runtime.rb is Java interop, so they are not type-checked here. + "lib/rbs/wasm/location.rb", + "lib/rbs/wasm/runtime.rb", + "lib/rbs/wasm/parser.rb", ) library "pathname", "json", "logger", "monitor", "tsort", "uri", 'dbm', 'pstore', 'singleton', 'shellwords', 'fileutils', 'find', 'digest', 'prettyprint', 'yaml', "psych", "securerandom" diff --git a/include/rbs/serialize.h b/include/rbs/serialize.h index e55b7b64c..387f051bf 100644 --- a/include/rbs/serialize.h +++ b/include/rbs/serialize.h @@ -30,4 +30,10 @@ */ rbs_string_t rbs_serialize_node(rbs_allocator_t *allocator, rbs_constant_pool_t *constant_pool, rbs_node_t *node); +/** + * Like rbs_serialize_node, but for a bare node list (e.g. the result of + * rbs_parse_type_params). Decoded by RBS::WASM::Deserializer.deserialize_node_list. + */ +rbs_string_t rbs_serialize_node_list(rbs_allocator_t *allocator, rbs_constant_pool_t *constant_pool, rbs_node_list_t *list); + #endif diff --git a/lib/rbs.rb b/lib/rbs.rb index bbc8c8382..e59fd95e4 100644 --- a/lib/rbs.rb +++ b/lib/rbs.rb @@ -69,7 +69,14 @@ require "rbs/type_alias_regularity" require "rbs/collection" -require "rbs_extension" +if RUBY_ENGINE == "jruby" + # JRuby cannot load the MRI C extension. Run the parser in WebAssembly and + # provide pure-Ruby implementations of RBS::Location and RBS::Parser instead. + require "rbs/wasm/location" + require "rbs/wasm/parser" +else + require "rbs_extension" +end require "rbs/parser_aux" require "rbs/location_aux" diff --git a/lib/rbs/wasm/deserializer.rb b/lib/rbs/wasm/deserializer.rb index 4b19331a1..29e2fd53b 100644 --- a/lib/rbs/wasm/deserializer.rb +++ b/lib/rbs/wasm/deserializer.rb @@ -19,6 +19,18 @@ def self.deserialize(bytes, buffer) new(bytes, buffer).read_node end + # Deserialize a bare node list (rbs_serialize_node_list), e.g. the result + # of RBS::Parser._parse_type_params. + def self.deserialize_node_list(bytes, buffer) + new(bytes, buffer).read_node_list + end + + # Deserialize the token stream produced by rbs_wasm_lex into the + # [type, location] pairs RBS::Parser._lex returns. + def self.deserialize_tokens(bytes, buffer) + new(bytes, buffer).read_tokens + end + def initialize(bytes, buffer) @bytes = bytes @buffer = buffer @@ -50,6 +62,23 @@ def read_node end end + def read_node_list + Array.new(read_count) { read_node } + end + + # The lex stream has no leading count: read records until the buffer is + # exhausted. Each is a token type name followed by its character range. + def read_tokens + tokens = [] #: Array[[ Symbol, Location ]] + until @pos >= @bytes.bytesize + type = read_string(Encoding::UTF_8).to_sym + start_char = read_i32 + end_char = read_i32 + tokens << [type, RBS::Location.new(@buffer, start_char, end_char)] + end + tokens + end + private def read_struct(entry) @@ -87,10 +116,6 @@ def read_field(reader) end end - def read_node_list - Array.new(read_count) { read_node } - end - def read_hash hash = {} #: Hash[untyped, untyped] read_count.times do diff --git a/lib/rbs/wasm/location.rb b/lib/rbs/wasm/location.rb new file mode 100644 index 000000000..6f89d6927 --- /dev/null +++ b/lib/rbs/wasm/location.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +module RBS + # Pure-Ruby implementation of the primitives that back RBS::Location. + # + # On CRuby these come from the C extension (ext/rbs_extension/legacy_location.c). + # JRuby loads this instead, before rbs/location_aux.rb layers the public API on + # top, so RBS::Location behaves identically without the native extension. + class Location + attr_reader :buffer + + def initialize(buffer, start_pos, end_pos) + @buffer = buffer + @start_pos = start_pos + @end_pos = end_pos + @required_children = {} #: Hash[Symbol, [ Integer, Integer ]] + @optional_children = {} #: Hash[Symbol, [ Integer, Integer ]?] + end + + def _start_pos + @start_pos + end + + def _end_pos + @end_pos + end + + def _add_required_child(name, start_pos, end_pos) + @required_children[name] = [start_pos, end_pos] + end + + def _add_optional_child(name, start_pos, end_pos) + @optional_children[name] = [start_pos, end_pos] + end + + def _add_optional_no_child(name) + @optional_children[name] = nil + end + + def _required_keys + @required_children.keys + end + + def _optional_keys + @optional_children.keys + end + + def [](name) + if (range = @required_children[name]) + return Location.new(@buffer, range[0], range[1]) + end + + if @optional_children.key?(name) + range = @optional_children[name] + return range && Location.new(@buffer, range[0], range[1]) + end + + nil + end + end +end diff --git a/lib/rbs/wasm/parser.rb b/lib/rbs/wasm/parser.rb new file mode 100644 index 000000000..c46aedfa0 --- /dev/null +++ b/lib/rbs/wasm/parser.rb @@ -0,0 +1,137 @@ +# frozen_string_literal: true + +require_relative "runtime" +require_relative "deserializer" + +module RBS + # WebAssembly-backed implementation of the parser primitives. + # + # On CRuby these come from the C extension (ext/rbs_extension/main.c). JRuby + # loads this instead: it runs the parser inside WebAssembly, then rebuilds the + # AST with RBS::WASM::Deserializer. rbs/parser_aux.rb layers the public + # RBS::Parser API on top, exactly as it does for the C extension. + class Parser + class << self + def _parse_signature(buffer, start_pos, end_pos) + validate_position_range(start_pos, end_pos) + encoding = buffer.content.encoding.name + success, bytes = WASM::Runtime.instance.parse_signature(buffer.content, encoding, start_pos, end_pos) + raise_parsing_error(buffer, bytes) unless success + + WASM::Deserializer.deserialize(bytes, buffer) + end + + def _parse_type(buffer, start_pos, end_pos, variables, require_eof, void_allowed, self_allowed, classish_allowed) + validate_position_range(start_pos, end_pos) + validate_variables(variables) + encoding = buffer.content.encoding.name + success, bytes = WASM::Runtime.instance.parse_type(buffer.content, encoding, start_pos, end_pos, variables, require_eof, void_allowed, self_allowed, classish_allowed) + raise_parsing_error(buffer, bytes) unless success + + deserialize_or_nil(bytes, buffer) + end + + def _parse_method_type(buffer, start_pos, end_pos, variables, require_eof) + validate_position_range(start_pos, end_pos) + validate_variables(variables) + encoding = buffer.content.encoding.name + success, bytes = WASM::Runtime.instance.parse_method_type(buffer.content, encoding, start_pos, end_pos, variables, require_eof) + raise_parsing_error(buffer, bytes) unless success + + deserialize_or_nil(bytes, buffer) + end + + def _parse_type_params(buffer, start_pos, end_pos, module_type_params) + validate_position_range(start_pos, end_pos) + encoding = buffer.content.encoding.name + success, bytes = WASM::Runtime.instance.parse_type_params(buffer.content, encoding, start_pos, end_pos, module_type_params) + raise_parsing_error(buffer, bytes) unless success + + bytes.empty? ? nil : WASM::Deserializer.deserialize_node_list(bytes, buffer) + end + + def _lex(buffer, end_pos) + encoding = buffer.content.encoding.name + _success, bytes = WASM::Runtime.instance.lex(buffer.content, encoding, end_pos) + + WASM::Deserializer.deserialize_tokens(bytes, buffer) + end + + def _parse_inline_leading_annotation(buffer, start_pos, end_pos, variables) + validate_position_range(start_pos, end_pos) + validate_variables(variables) + encoding = buffer.content.encoding.name + success, bytes = WASM::Runtime.instance.parse_inline_leading_annotation(buffer.content, encoding, start_pos, end_pos, variables) + raise_parsing_error(buffer, bytes) unless success + + deserialize_or_nil(bytes, buffer) + end + + def _parse_inline_trailing_annotation(buffer, start_pos, end_pos, variables) + validate_position_range(start_pos, end_pos) + validate_variables(variables) + encoding = buffer.content.encoding.name + success, bytes = WASM::Runtime.instance.parse_inline_trailing_annotation(buffer.content, encoding, start_pos, end_pos, variables) + raise_parsing_error(buffer, bytes) unless success + + deserialize_or_nil(bytes, buffer) + end + + private + + # Reject negative or reversed ranges before handing them to the parser, + # matching validate_position_range in the C extension (main.c). A reversed + # range would otherwise make the lexer loop forever inside WebAssembly. + def validate_position_range(start_pos, end_pos) + if start_pos < 0 || end_pos < 0 + raise ArgumentError, "negative position range: #{start_pos}...#{end_pos}" + end + if start_pos > end_pos + raise ArgumentError, "invalid position range: #{start_pos}...#{end_pos}" + end + end + + # Reject anything that is not nil or an Array of Symbols, matching + # declare_type_variables in the C extension (main.c). + def validate_variables(variables) + return if variables.nil? + + unless variables.is_a?(Array) + raise TypeError, "wrong argument type #{variables.class} (must be an Array of Symbols or nil)" + end + + variables.each do |variable| + unless variable.is_a?(Symbol) + raise TypeError, "Type variables Array contains invalid value #{variable.inspect} of type #{variable.class} (must be an Array of Symbols or nil)" + end + end + end + + # An empty result means the parser reached EOF immediately (`nil`). + def deserialize_or_nil(bytes, buffer) + bytes.empty? ? nil : WASM::Deserializer.deserialize(bytes, buffer) + end + + # Decodes the error blob written by set_error_result (rbs_wasm.c) and raises + # the same error the C extension would (see raise_error in main.c). + def raise_parsing_error(buffer, blob) + start_char, end_char, syntax_error = blob.unpack("l JVM bytecode), which + # runs the parser ~8x faster than the interpreter. Optional: the runtime + # falls back to the interpreter when they are absent. asm* are the ow2 ASM + # libraries the compiler depends on. + OPTIONAL_JARS = %w[compiler asm asm-tree asm-util asm-commons asm-analysis].freeze + + class << self + def instance + @instance ||= new + end + + def wasm_path + ENV["RBS_WASM_PARSER"] || File.expand_path("rbs_parser.wasm", __dir__) + end + + def jars_dir + ENV["RBS_WASM_JARS"] || File.expand_path("jars", __dir__) + end + end + + def initialize + super() + load_jars + @wasm = build_instance + @memory = @wasm.memory + @alloc = @wasm.export("rbs_wasm_alloc") + @free = @wasm.export("rbs_wasm_free") + @result_ptr = @wasm.export("rbs_wasm_result_ptr") + @result_len = @wasm.export("rbs_wasm_result_len") + @parse_signature = @wasm.export("rbs_wasm_parse_signature") + @parse_type = @wasm.export("rbs_wasm_parse_type") + @parse_method_type = @wasm.export("rbs_wasm_parse_method_type") + @parse_type_params = @wasm.export("rbs_wasm_parse_type_params") + @parse_inline_leading_annotation = @wasm.export("rbs_wasm_parse_inline_leading_annotation") + @parse_inline_trailing_annotation = @wasm.export("rbs_wasm_parse_inline_trailing_annotation") + @lex = @wasm.export("rbs_wasm_lex") + end + + # `content` is the whole buffer; `start_pos`/`end_pos` are the character + # range within it to parse. Each method returns [success, bytes]: on success + # `bytes` is the serialized AST, otherwise it is the error blob (see + # set_error_result in rbs_wasm.c). + + def parse_signature(content, encoding, start_pos, end_pos) + run(content, encoding) { |ptr, len, enc_ptr, enc_len| @parse_signature.apply(ptr, len, enc_ptr, enc_len, start_pos, end_pos)[0] } + end + + def parse_type(content, encoding, start_pos, end_pos, variables, require_eof, void_allowed, self_allowed, classish_allowed) + with_variables(variables) do |vars_ptr, vars_len| + run(content, encoding) do |ptr, len, enc_ptr, enc_len| + @parse_type.apply(ptr, len, enc_ptr, enc_len, start_pos, end_pos, vars_ptr, vars_len, bool(require_eof), bool(void_allowed), bool(self_allowed), bool(classish_allowed))[0] + end + end + end + + def parse_method_type(content, encoding, start_pos, end_pos, variables, require_eof) + with_variables(variables) do |vars_ptr, vars_len| + run(content, encoding) do |ptr, len, enc_ptr, enc_len| + @parse_method_type.apply(ptr, len, enc_ptr, enc_len, start_pos, end_pos, vars_ptr, vars_len, bool(require_eof))[0] + end + end + end + + def parse_type_params(content, encoding, start_pos, end_pos, module_type_params) + run(content, encoding) do |ptr, len, enc_ptr, enc_len| + @parse_type_params.apply(ptr, len, enc_ptr, enc_len, start_pos, end_pos, bool(module_type_params))[0] + end + end + + def parse_inline_leading_annotation(content, encoding, start_pos, end_pos, variables) + with_variables(variables) do |vars_ptr, vars_len| + run(content, encoding) do |ptr, len, enc_ptr, enc_len| + @parse_inline_leading_annotation.apply(ptr, len, enc_ptr, enc_len, start_pos, end_pos, vars_ptr, vars_len)[0] + end + end + end + + def parse_inline_trailing_annotation(content, encoding, start_pos, end_pos, variables) + with_variables(variables) do |vars_ptr, vars_len| + run(content, encoding) do |ptr, len, enc_ptr, enc_len| + @parse_inline_trailing_annotation.apply(ptr, len, enc_ptr, enc_len, start_pos, end_pos, vars_ptr, vars_len)[0] + end + end + end + + def lex(content, encoding, end_pos) + run(content, encoding) do |ptr, len, enc_ptr, enc_len| + @lex.apply(ptr, len, enc_ptr, enc_len, end_pos)[0] + end + end + + private + + # Copies `source` and its encoding name into linear memory, yields their + # pointers/lengths to the block (which invokes the parser and returns its + # status), then reads the result back out. Serialized through the monitor + # because the module keeps its result in a single shared location. + def run(source, encoding) + synchronize do + bytes = source.b + length = bytes.bytesize + name = encoding.to_s.b + name_length = name.bytesize + source_ptr = @alloc.apply(length)[0] + name_ptr = @alloc.apply(name_length)[0] + begin + @memory.write(source_ptr, bytes.to_java_bytes) + @memory.write(name_ptr, name.to_java_bytes) unless name_length.zero? + status = yield(source_ptr, length, name_ptr, name_length) + [status == 1, read_result] + ensure + @free.apply(source_ptr) + @free.apply(name_ptr) + end + end + end + + def read_result + pointer = @result_ptr.apply[0] + length = @result_len.apply[0] + return "".b if length.zero? + + String.from_java_bytes(@memory.read_bytes(pointer, length)).b + end + + # Allocates a buffer of newline-separated variable names and yields its + # pointer/length. A nil `variables` is passed as length -1 ("no variables"). + def with_variables(variables) + names = variables&.map(&:to_s)&.join("\n") + + if names.nil? || names.empty? + return yield(0, variables.nil? ? -1 : 0) + end + + bytes = names.b + length = bytes.bytesize + synchronize do + pointer = @alloc.apply(length)[0] + begin + @memory.write(pointer, bytes.to_java_bytes) + yield(pointer, length) + ensure + @free.apply(pointer) + end + end + end + + def bool(value) + value ? 1 : 0 + end + + def build_instance + parser = Java::ComDylibsoChicoryWasm::Parser + instance_class = Java::ComDylibsoChicoryRuntime::Instance + import_values = Java::ComDylibsoChicoryRuntime::ImportValues + wasi_preview1 = Java::ComDylibsoChicoryWasi::WasiPreview1 + wasi_options = Java::ComDylibsoChicoryWasi::WasiOptions + + wasm_module = parser.parse(java.io.File.new(self.class.wasm_path)) + wasi = wasi_preview1.builder.with_options(wasi_options.builder.build).build + imports = import_values.builder.add_function(wasi.to_host_functions).build + + builder = instance_class.builder(wasm_module).with_import_values(imports) + if (factory = machine_factory(wasm_module)) + builder = builder.with_machine_factory(factory) + end + + wasm = builder.build + wasm.export("_initialize").apply + wasm + end + + # Chicory's AOT compiler when its jars are present and usable, otherwise nil + # (the builder then uses the interpreter). NameError covers a missing + # compiler class; LinkageError covers an incompatible/missing ASM (so a bad + # jar set degrades to the interpreter instead of crashing). + def machine_factory(wasm_module) + Java::ComDylibsoChicoryCompiler::MachineFactoryCompiler.compile(wasm_module) + rescue NameError, Java::JavaLang::LinkageError + nil + end + + def load_jars + JARS.each { |name| require jar_path(name) } + OPTIONAL_JARS.each do |name| + path = jar_path(name) + require path if File.exist?(path) + end + end + + def jar_path(name) + File.join(self.class.jars_dir, "#{name}.jar") + end + end + end +end diff --git a/rbs.gemspec b/rbs.gemspec index 05ed56874..dcc0935e2 100644 --- a/rbs.gemspec +++ b/rbs.gemspec @@ -35,7 +35,18 @@ Gem::Specification.new do |spec| ].any? {|r| f.match(r) } end end - spec.extensions = %w{ext/rbs_extension/extconf.rb} + + # JRuby cannot load the MRI C extension. The `java` platform gem ships the + # prebuilt WebAssembly parser and the Chicory jars instead (assembled by + # `rake wasm:jruby_setup`), and RBS loads the WebAssembly-backed parser. + if ENV["RBS_PLATFORM"] == "java" || (defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby") + spec.platform = "java" + spec.files += Dir.chdir(File.expand_path('..', __FILE__)) do + Dir.glob("lib/rbs/wasm/rbs_parser.wasm") + Dir.glob("lib/rbs/wasm/jars/*.jar") + end + else + spec.extensions = %w{ext/rbs_extension/extconf.rb} + end if false spec.required_ruby_version = ">= 3.4" diff --git a/sig/wasm/deserializer.rbs b/sig/wasm/deserializer.rbs index 4e04e43c2..33cffbe5c 100644 --- a/sig/wasm/deserializer.rbs +++ b/sig/wasm/deserializer.rbs @@ -17,19 +17,27 @@ module RBS # `[directives, declarations]` to match RBS::Parser._parse_signature. def self.deserialize: (String bytes, Buffer buffer) -> untyped + # Deserialize a bare node list (RBS::Parser._parse_type_params). + def self.deserialize_node_list: (String bytes, Buffer buffer) -> Array[untyped] + + # Deserialize the token stream from rbs_wasm_lex (RBS::Parser._lex). + def self.deserialize_tokens: (String bytes, Buffer buffer) -> Array[[ Symbol, Location ]] + def initialize: (String bytes, Buffer buffer) -> void # Reads the next node and returns the reconstructed Ruby value. def read_node: () -> untyped + def read_node_list: () -> Array[untyped] + + def read_tokens: () -> Array[[ Symbol, Location ]] + private def read_struct: (Array[untyped] entry) -> untyped def read_field: (untyped reader) -> untyped - def read_node_list: () -> Array[untyped] - def read_hash: () -> Hash[untyped, untyped] def read_count: () -> Integer diff --git a/src/serialize.c b/src/serialize.c index 77162f074..064c6701b 100644 --- a/src/serialize.c +++ b/src/serialize.c @@ -944,3 +944,15 @@ rbs_string_t rbs_serialize_node(rbs_allocator_t *allocator, rbs_constant_pool_t return rbs_buffer_to_string(&state.buffer); } + +rbs_string_t rbs_serialize_node_list(rbs_allocator_t *allocator, rbs_constant_pool_t *constant_pool, rbs_node_list_t *list) { + rbs_serialize_state state = { + .allocator = allocator, + .constant_pool = constant_pool, + }; + rbs_buffer_init(allocator, &state.buffer); + + w_node_list(&state, list); + + return rbs_buffer_to_string(&state.buffer); +} diff --git a/templates/include/rbs/serialize.h.erb b/templates/include/rbs/serialize.h.erb index c9794f62e..572a5f616 100644 --- a/templates/include/rbs/serialize.h.erb +++ b/templates/include/rbs/serialize.h.erb @@ -23,4 +23,10 @@ */ rbs_string_t rbs_serialize_node(rbs_allocator_t *allocator, rbs_constant_pool_t *constant_pool, rbs_node_t *node); +/** + * Like rbs_serialize_node, but for a bare node list (e.g. the result of + * rbs_parse_type_params). Decoded by RBS::WASM::Deserializer.deserialize_node_list. + */ +rbs_string_t rbs_serialize_node_list(rbs_allocator_t *allocator, rbs_constant_pool_t *constant_pool, rbs_node_list_t *list); + #endif diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index b0c4aceaf..c72d2fecc 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -219,3 +219,15 @@ rbs_string_t rbs_serialize_node(rbs_allocator_t *allocator, rbs_constant_pool_t return rbs_buffer_to_string(&state.buffer); } + +rbs_string_t rbs_serialize_node_list(rbs_allocator_t *allocator, rbs_constant_pool_t *constant_pool, rbs_node_list_t *list) { + rbs_serialize_state state = { + .allocator = allocator, + .constant_pool = constant_pool, + }; + rbs_buffer_init(allocator, &state.buffer); + + w_node_list(&state, list); + + return rbs_buffer_to_string(&state.buffer); +} diff --git a/test/rbs/wasm/jruby_parser_test.rb b/test/rbs/wasm/jruby_parser_test.rb new file mode 100644 index 000000000..c66a3e4e5 --- /dev/null +++ b/test/rbs/wasm/jruby_parser_test.rb @@ -0,0 +1,85 @@ +# frozen_string_literal: true + +require "test/unit" +require "rbs" + +module RBS + module WASM + # Exercises the WebAssembly-backed parser that RBS uses on JRuby. It runs only + # on JRuby (where RBS::Parser is implemented by lib/rbs/wasm), and deliberately + # avoids test_helper so it needs nothing beyond `rbs` itself. + # + # Byte-for-byte equivalence with the C extension is covered on CRuby by + # test/rbs/wasm/serialization_test.rb; here we confirm the same code path works + # end to end through WebAssembly on JRuby. + # + # Nested modules (rather than `class RBS::WASM::JRubyParserTest`) so the file + # also loads on CRuby, where RBS::WASM is otherwise absent and the test omits. + # Test::Unit is fully qualified because RBS::Test exists and would shadow it. + class JRubyParserTest < ::Test::Unit::TestCase + ROOT = File.expand_path("../../..", __dir__) + + def setup + omit "Only runs on the JRuby/WebAssembly parser" unless RUBY_ENGINE == "jruby" + end + + def test_parses_every_bundled_signature + paths = Dir.glob(File.join(ROOT, "{core,stdlib,sig}/**/*.rbs")).sort + assert_operator paths.size, :>, 0, "expected to find bundled RBS files" + + paths.each do |path| + source = File.read(path, encoding: "UTF-8") + _buffer, _directives, declarations = RBS::Parser.parse_signature(source) + assert_not_nil declarations, "failed to parse #{path}" + end + end + + def test_parse_signature_structure + _buffer, _directives, declarations = RBS::Parser.parse_signature(<<~RBS) + class Foo < Bar + attr_reader name: String + def greet: (String name) -> String + end + RBS + + decl = declarations[0] + assert_instance_of RBS::AST::Declarations::Class, decl + assert_equal "Foo", decl.name.to_s + assert_equal "Bar", decl.super_class&.name&.to_s + assert_equal [:name, :greet], decl.members.map { |member| member.respond_to?(:name) ? member.name : nil } + assert_equal 1, decl.location.start_line + end + + def test_parse_type + assert_equal "Hash[Symbol, Array[Integer]]", RBS::Parser.parse_type("Hash[Symbol, Array[Integer]]").to_s + assert_equal "^(Integer, ?String) { () -> void } -> bool", RBS::Parser.parse_type("^(Integer, ?String) { () -> void } -> bool").to_s + assert_equal "A | B", RBS::Parser.parse_type("A | B", variables: [:A, :B]).to_s + end + + def test_parse_method_type + assert_equal "[T] (T, ?Integer) { (T) -> void } -> T", RBS::Parser.parse_method_type("[T] (T, ?Integer) { (T) -> void } -> T").to_s + end + + def test_parse_type_params + params = RBS::Parser.parse_type_params("[T < Comparable, unchecked out U]") + assert_equal [:T, :U], params.map(&:name) + assert_equal "Comparable", params[0].upper_bound&.name&.to_s + assert_equal :covariant, params[1].variance + end + + def test_lex + types = RBS::Parser.lex("class Foo\nend\n").value.map(&:type) + assert_include types, :kCLASS + assert_equal :pEOF, types.last + end + + def test_parse_error_raises_parsing_error + error = assert_raises(RBS::ParsingError) do + RBS::Parser.parse_signature("class 123 Broken end") + end + assert_not_nil error.location + assert_equal "tINTEGER", error.token_type + end + end + end +end diff --git a/test/rbs/wasm/serialization_test.rb b/test/rbs/wasm/serialization_test.rb index ac2ab10ae..ccd4b99a0 100644 --- a/test/rbs/wasm/serialization_test.rb +++ b/test/rbs/wasm/serialization_test.rb @@ -13,6 +13,10 @@ # is what gives us confidence that the same format, produced inside WebAssembly, # will rebuild correct objects on JRuby. class RBS::WASM::SerializationTest < Test::Unit::TestCase + # The round-trip is driven by the C extension's `_parse_*_to_bytes`, which only + # exists on CRuby. JRuby's end-to-end coverage lives in jruby_parser_test.rb. + omit_on_jruby! "Uses the C extension's _parse_*_to_bytes helpers" + ROOT = File.expand_path("../../..", __dir__) def buffer(source) diff --git a/test/test_helper.rb b/test/test_helper.rb index 9749d615e..56bf68e6b 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -57,6 +57,26 @@ def self.omit_on_truffle_ruby!(reason = "Not supported on TruffleRuby") def omit_on_truffle_ruby!(reason = "Not supported on TruffleRuby") omit(reason) if RUBY_ENGINE == "truffleruby" end + + # Omit *all* test cases defined in this class when running on JRuby. + # + # On JRuby the parser runs in WebAssembly (see lib/rbs/wasm). Use this at the + # class body level for features that depend on the C extension or on APIs JRuby + # does not implement. + def self.omit_on_jruby!(reason = "Not supported on JRuby") + return unless RUBY_ENGINE == "jruby" + + setup { omit(reason) } + end + + # Omit the running test case when running on JRuby. + # + # Use it inside a test method when only a few cases of an otherwise supported + # class fail on JRuby (e.g. those exercising parser features not yet wired + # through the WebAssembly bridge, such as `lex` or `parse_type_params`). + def omit_on_jruby!(reason = "Not supported on JRuby") + omit(reason) if RUBY_ENGINE == "jruby" + end end module TestHelper diff --git a/wasm/README.md b/wasm/README.md index 1fd81d452..71e513a63 100644 --- a/wasm/README.md +++ b/wasm/README.md @@ -5,10 +5,12 @@ on the Ruby C API, so it can be compiled to WebAssembly as-is. This directory holds the small entry-point shim ([`rbs_wasm.c`](rbs_wasm.c)) that exposes a stable ABI to a WebAssembly host. -The motivating use case is running RBS on Ruby implementations that cannot load -the MRI C extension (notably JRuby): the host loads `rbs_parser.wasm`, runs the -parser over a source buffer, and reads the result back out — no native build per -platform required. +This is how RBS runs on Ruby implementations that cannot load the MRI C +extension (notably JRuby): the host loads `rbs_parser.wasm`, runs the parser over +a source buffer, and reads the serialized AST back out. The Ruby side then +rebuilds `RBS::AST` objects with `RBS::WASM::Deserializer` — no native build per +platform required. See [`lib/rbs/wasm`](../lib/rbs/wasm) and +[`docs/wasm_serialization.md`](../docs/wasm_serialization.md). ## Building @@ -17,16 +19,9 @@ The build needs the [WASI SDK](https://github.com/WebAssembly/wasi-sdk/releases) ```console $ export WASI_SDK_PATH=/path/to/wasi-sdk -$ rake wasm:build -Built .../wasm/rbs_parser.wasm -``` - -To also run the smoke test you need [wasmtime](https://wasmtime.dev/) (or another -WASI runtime, via the `WASMTIME` environment variable): - -```console -$ rake wasm:check -WebAssembly selftest passed. +$ rake wasm:build # compile rbs_parser.wasm +$ rake wasm:check # also smoke-test it (needs wasmtime) +$ rake wasm:jruby_setup # assemble lib/rbs/wasm/ for JRuby (wasm + Chicory jars) ``` The compiled `rbs_parser.wasm` is a build artifact and is not checked in. @@ -36,14 +31,29 @@ The compiled `rbs_parser.wasm` is a build artifact and is not checked in. The module is built as a "reactor": it has no `main`, and the host calls `_initialize` once before invoking any export. -| Export | Signature | Description | -| -------------------------- | --------------------- | ------------------------------------------------------------------------ | -| `rbs_wasm_alloc` | `(i32) -> i32` | Allocate N bytes in linear memory and return the offset. | -| `rbs_wasm_free` | `(i32) -> ()` | Free a region returned by `rbs_wasm_alloc`. | -| `rbs_wasm_parse_signature` | `(i32 ptr, i32 len) -> i32` | Parse the UTF-8 source at `ptr`/`len`. Returns 0 on success, 1 on error. | -| `rbs_wasm_selftest` | `() -> i32` | Parse a small fixed signature. Returns 0 on success, 1 otherwise. | - -This is the foundation step: it proves the parser builds and runs under -WebAssembly. Subsequent steps add a compact serialization of the parsed AST so -the host can reconstruct `RBS::AST` objects, and wire the module into RBS on -JRuby through a JVM WebAssembly runtime. +Memory management and results: + +| Export | Signature | Description | +| --- | --- | --- | +| `rbs_wasm_alloc` | `(i32) -> i32` | Allocate N bytes in linear memory, return the offset. | +| `rbs_wasm_free` | `(i32) -> ()` | Free a region from `rbs_wasm_alloc`. | +| `rbs_wasm_result_ptr` | `() -> i32` | Offset of the most recent result. | +| `rbs_wasm_result_len` | `() -> i32` | Length of the most recent result. | + +Parsing — each takes the whole buffer (`ptr`/`len`), its Ruby encoding name +(`enc`/`enc_len`, e.g. `"UTF-8"` or `"EUC-JP"`; falls back to UTF-8 when empty or +unknown), and the character range to parse (`start`/`end`). Each returns `1` on +success or `0` on a parse error. On success the result is the serialized AST; on +error it is an error blob (start/end positions, syntax flag, token type, +message). Type/method-type parsing also takes a buffer of newline-separated +type-variable names (`vars`/`vars_len`, with `vars_len < 0` meaning "none"): + +| Export | Signature | +| --- | --- | +| `rbs_wasm_parse_signature` | `(ptr, len, enc, enc_len, start, end) -> i32` | +| `rbs_wasm_parse_type` | `(ptr, len, enc, enc_len, start, end, vars, vars_len, require_eof, void_allowed, self_allowed, classish_allowed) -> i32` | +| `rbs_wasm_parse_method_type` | `(ptr, len, enc, enc_len, start, end, vars, vars_len, require_eof) -> i32` | +| `rbs_wasm_selftest` | `() -> i32` (parses a fixed sample; `1` on success) | + +For type and method-type parsing, a successful result of length 0 means the input +was empty (`nil`). diff --git a/wasm/rbs_wasm.c b/wasm/rbs_wasm.c index 41a35051f..14697d43f 100644 --- a/wasm/rbs_wasm.c +++ b/wasm/rbs_wasm.c @@ -3,34 +3,50 @@ * * WebAssembly entry points for the RBS parser. * - * The RBS parser in `src/` is plain, self-contained C with no dependency on - * the Ruby C API. This file exposes a small, stable ABI so that the parser can - * be driven from a WebAssembly host (for example, a JVM-based runtime running - * under JRuby). + * The parser in `src/` is plain, self-contained C with no dependency on the + * Ruby C API, so it compiles to WebAssembly as-is. This file exposes a small, + * stable ABI so the parser can be driven from a WebAssembly host (a JVM-based + * runtime running under JRuby). * - * This module is built as a "reactor" (`-mexec-model=reactor`): it has no - * `main`, and the host is expected to call `_initialize` once before invoking - * any of the exported functions below. + * The flow is: the host writes a UTF-8 source string into linear memory + * (`rbs_wasm_alloc`), calls one of the `rbs_wasm_parse_*` functions, and reads + * the result back out (`rbs_wasm_result_ptr` / `rbs_wasm_result_len`). On + * success the result is the serialized AST (see `rbs_serialize_node` and + * `docs/wasm_serialization.md`); on a parse error it is an error blob (see + * `set_error_result`). `RBS::WASM` on the Ruby side decodes both. * - * For now this only proves the toolchain end to end: it can allocate memory in - * the linear address space, run the parser over a source buffer, and report - * whether parsing succeeded. Serializing the resulting AST back to the host is - * handled in a later step. + * Built as a "reactor": no `main`, and the host calls `_initialize` once before + * invoking any export. */ +#include #include #include -#include #include "rbs/parser.h" +#include "rbs/serialize.h" #include "rbs/string.h" +#include "rbs/util/rbs_buffer.h" #include "rbs/util/rbs_encoding.h" +// The result of the most recent parse, living in linear memory until the next +// call replaces it. WebAssembly is little-endian, so the multi-byte integers +// written below match the little-endian format the Ruby decoder expects. +static char *result_buffer = NULL; +static int32_t result_length = 0; + +// Replace the current result with a fresh `length`-byte buffer and return a +// pointer to it for the caller to fill in. +static char *allocate_result(size_t length) { + free(result_buffer); + result_buffer = (char *) malloc(length == 0 ? 1 : length); + result_length = (int32_t) length; + return result_buffer; +} + /** - * Allocate `size` bytes in the module's linear memory and return the offset. - * - * The host uses this to reserve a region it can write an input string into - * before calling one of the parse entry points. + * Allocate `size` bytes in linear memory and return the offset. The host uses + * this to reserve space for an input string before calling a parse function. */ __attribute__((export_name("rbs_wasm_alloc"))) void *rbs_wasm_alloc(size_t size) { return malloc(size); @@ -44,46 +60,352 @@ __attribute__((export_name("rbs_wasm_free"))) void rbs_wasm_free(void *ptr) { } /** - * Parse an RBS signature from a UTF-8 source buffer. + * Offset of the most recent parse result in linear memory. + */ +__attribute__((export_name("rbs_wasm_result_ptr"))) +int32_t +rbs_wasm_result_ptr(void) { + return (int32_t) (intptr_t) result_buffer; +} + +/** + * Length, in bytes, of the most recent parse result. + */ +__attribute__((export_name("rbs_wasm_result_len"))) +int32_t +rbs_wasm_result_len(void) { + return result_length; +} + +// Encode the parser's error into the result buffer: +// +// [i32 start_char][i32 end_char][u8 syntax_error] +// [u32 token_type_len][token_type bytes][u32 message_len][message bytes] +// +// Always returns 0, the failure status for the parse functions. +static int set_error_result(rbs_parser_t *parser) { + rbs_error_t *error = parser->error; + const char *token_type = rbs_token_type_str(error->token.type); + const char *message = error->message; + uint32_t token_type_len = (uint32_t) strlen(token_type); + uint32_t message_len = (uint32_t) strlen(message); + + int32_t start_char = error->token.range.start.char_pos; + int32_t end_char = error->token.range.end.char_pos; + uint8_t syntax_error = error->syntax_error ? 1 : 0; + + size_t total = 4 + 4 + 1 + 4 + token_type_len + 4 + message_len; + char *p = allocate_result(total); + + memcpy(p, &start_char, 4); + p += 4; + memcpy(p, &end_char, 4); + p += 4; + *p++ = (char) syntax_error; + memcpy(p, &token_type_len, 4); + p += 4; + memcpy(p, token_type, token_type_len); + p += token_type_len; + memcpy(p, &message_len, 4); + p += 4; + memcpy(p, message, message_len); + + return 0; +} + +static int set_serialized_result(rbs_parser_t *parser, rbs_node_t *node) { + rbs_string_t bytes = rbs_serialize_node(parser->allocator, &parser->constant_pool, node); + size_t length = rbs_string_len(bytes); + memcpy(allocate_result(length), bytes.start, length); + return 1; +} + +// A reversed or out-of-bounds range would make the lexer loop forever, which +// would hang the whole host. Hosts are expected to validate too (RBS::Parser +// raises on bad ranges), but guard here so a stray caller can never wedge the VM. +static bool range_is_valid(int start_pos, int end_pos, int length) { + return start_pos >= 0 && end_pos >= 0 && start_pos <= end_pos && end_pos <= length; +} + +// Resolve a Ruby encoding name (e.g. "UTF-8", "EUC-JP") to an rbs encoding, +// falling back to UTF-8 when none is given or the name is not recognised. +static const rbs_encoding_t *resolve_encoding(const char *name, int name_length) { + if (name_length > 0) { + const rbs_encoding_t *encoding = rbs_encoding_find((const uint8_t *) name, (const uint8_t *) (name + name_length)); + if (encoding != NULL) return encoding; + } + return RBS_ENCODING_UTF_8_ENTRY; +} + +// Declare type variables from a buffer of newline-separated names. A negative +// length means "no variables given" (the parser keeps its default table). +static void declare_variables(rbs_parser_t *parser, const char *variables, int variables_length) { + if (variables_length < 0) return; + + rbs_parser_push_typevar_table(parser, true); + + const char *cursor = variables; + const char *end = variables + variables_length; + const char *name_start = cursor; + + while (cursor <= end) { + if (cursor == end || *cursor == '\n') { + size_t name_length = (size_t) (cursor - name_start); + if (name_length > 0) { + uint8_t *copied = (uint8_t *) malloc(name_length); + memcpy(copied, name_start, name_length); + rbs_constant_id_t id = rbs_constant_pool_insert_owned(&parser->constant_pool, copied, name_length); + (void) rbs_parser_insert_typevar(parser, id); + } + name_start = cursor + 1; + } + cursor++; + } +} + +/** + * Parse an RBS signature from a source buffer. + * + * `source`/`length` is the whole buffer content; `encoding`/`encoding_length` is + * its Ruby encoding name; `start_pos`/`end_pos` are the character range within it + * to parse, so reported locations are absolute (this mirrors + * RBS::Parser._parse_signature). * - * @param source Offset of the source buffer in linear memory. - * @param length Length of the source buffer, in bytes. - * @return 0 if parsing succeeded, 1 if a parse error occurred. + * @return 1 on success (result is the serialized AST), 0 on a parse error + * (result is an error blob). */ -__attribute__((export_name("rbs_wasm_parse_signature"))) int rbs_wasm_parse_signature(const char *source, int length) { +__attribute__((export_name("rbs_wasm_parse_signature"))) int rbs_wasm_parse_signature(const char *source, int length, const char *encoding, int encoding_length, int start_pos, int end_pos) { + if (!range_is_valid(start_pos, end_pos, length)) { + allocate_result(0); + return 0; + } + rbs_string_t string = rbs_string_new(source, source + length); - const rbs_encoding_t *encoding = RBS_ENCODING_UTF_8_ENTRY; - rbs_parser_t *parser = rbs_parser_new(string, encoding, 0, length); + rbs_parser_t *parser = rbs_parser_new(string, resolve_encoding(encoding, encoding_length), start_pos, end_pos); rbs_signature_t *signature = NULL; - bool ok = rbs_parse_signature(parser, &signature); + rbs_parse_signature(parser, &signature); + + int status; + if (parser->error == NULL) { + status = set_serialized_result(parser, (rbs_node_t *) signature); + } else { + status = set_error_result(parser); + } + + rbs_parser_free(parser); + return status; +} + +/** + * Parse a single RBS type. + * + * @param variables Newline-separated type variable names (length < 0 for none). + * @return 1 on success, 0 on a parse error. On success with an empty result + * (`rbs_wasm_result_len` == 0), the input was empty (`nil`). + */ +__attribute__((export_name("rbs_wasm_parse_type"))) int rbs_wasm_parse_type(const char *source, int length, const char *encoding, int encoding_length, int start_pos, int end_pos, const char *variables, int variables_length, int require_eof, int void_allowed, int self_allowed, int classish_allowed) { + if (!range_is_valid(start_pos, end_pos, length)) { + allocate_result(0); + return 0; + } + + rbs_string_t string = rbs_string_new(source, source + length); + rbs_parser_t *parser = rbs_parser_new(string, resolve_encoding(encoding, encoding_length), start_pos, end_pos); + declare_variables(parser, variables, variables_length); + + int status; + if (parser->next_token.type == pEOF) { + allocate_result(0); + status = 1; + } else { + rbs_node_t *type = NULL; + rbs_parse_type(parser, &type, void_allowed != 0, self_allowed != 0, classish_allowed != 0); + + if (parser->error == NULL && require_eof) { + rbs_parser_advance(parser); + if (parser->current_token.type != pEOF) { + rbs_parser_set_error(parser, parser->current_token, true, "expected a token `%s`", rbs_token_type_str(pEOF)); + } + } + + status = parser->error == NULL ? set_serialized_result(parser, type) : set_error_result(parser); + } + + rbs_parser_free(parser); + return status; +} + +/** + * Parse a single RBS method type. + * + * @param variables Newline-separated type variable names (length < 0 for none). + * @return 1 on success, 0 on a parse error. On success with an empty result, + * the input was empty (`nil`). + */ +__attribute__((export_name("rbs_wasm_parse_method_type"))) int rbs_wasm_parse_method_type(const char *source, int length, const char *encoding, int encoding_length, int start_pos, int end_pos, const char *variables, int variables_length, int require_eof) { + if (!range_is_valid(start_pos, end_pos, length)) { + allocate_result(0); + return 0; + } - int result = (ok && parser->error == NULL) ? 0 : 1; + rbs_string_t string = rbs_string_new(source, source + length); + rbs_parser_t *parser = rbs_parser_new(string, resolve_encoding(encoding, encoding_length), start_pos, end_pos); + declare_variables(parser, variables, variables_length); + + int status; + if (parser->next_token.type == pEOF) { + allocate_result(0); + status = 1; + } else { + rbs_method_type_t *method_type = NULL; + rbs_parse_method_type(parser, &method_type, require_eof != 0, true); + + status = parser->error == NULL ? set_serialized_result(parser, (rbs_node_t *) method_type) : set_error_result(parser); + } rbs_parser_free(parser); + return status; +} + +/** + * Parse a type parameter list (e.g. `[T < Comparable]`). On success the result + * is a serialized node list; an empty result means the input was empty (`nil`). + */ +__attribute__((export_name("rbs_wasm_parse_type_params"))) int rbs_wasm_parse_type_params(const char *source, int length, const char *encoding, int encoding_length, int start_pos, int end_pos, int module_type_params) { + if (!range_is_valid(start_pos, end_pos, length)) { + allocate_result(0); + return 0; + } + + rbs_string_t string = rbs_string_new(source, source + length); + rbs_parser_t *parser = rbs_parser_new(string, resolve_encoding(encoding, encoding_length), start_pos, end_pos); - return result; + int status; + if (parser->next_token.type == pEOF) { + allocate_result(0); + status = 1; + } else { + rbs_node_list_t *params = NULL; + rbs_parse_type_params(parser, module_type_params != 0, ¶ms); + + if (parser->error == NULL) { + rbs_string_t bytes = rbs_serialize_node_list(parser->allocator, &parser->constant_pool, params); + size_t n = rbs_string_len(bytes); + memcpy(allocate_result(n), bytes.start, n); + status = 1; + } else { + status = set_error_result(parser); + } + } + + rbs_parser_free(parser); + return status; +} + +// Shared body for the leading/trailing inline annotation parsers. +static int parse_inline_annotation(const char *source, int length, const char *encoding, int encoding_length, int start_pos, int end_pos, const char *variables, int variables_length, bool leading) { + if (!range_is_valid(start_pos, end_pos, length)) { + allocate_result(0); + return 0; + } + + rbs_string_t string = rbs_string_new(source, source + length); + rbs_parser_t *parser = rbs_parser_new(string, resolve_encoding(encoding, encoding_length), start_pos, end_pos); + declare_variables(parser, variables, variables_length); + + rbs_ast_ruby_annotations_t *annotation = NULL; + bool success = leading ? rbs_parse_inline_leading_annotation(parser, &annotation) : rbs_parse_inline_trailing_annotation(parser, &annotation); + + int status; + if (parser->error != NULL) { + status = set_error_result(parser); + } else if (!success || annotation == NULL) { + allocate_result(0); + status = 1; + } else { + status = set_serialized_result(parser, (rbs_node_t *) annotation); + } + + rbs_parser_free(parser); + return status; +} + +/** + * Parse an inline leading annotation. On success the result is a serialized + * node; an empty result means there was no annotation (`nil`). + */ +__attribute__((export_name("rbs_wasm_parse_inline_leading_annotation"))) int rbs_wasm_parse_inline_leading_annotation(const char *source, int length, const char *encoding, int encoding_length, int start_pos, int end_pos, const char *variables, int variables_length) { + return parse_inline_annotation(source, length, encoding, encoding_length, start_pos, end_pos, variables, variables_length, true); +} + +/** + * Parse an inline trailing annotation. See rbs_wasm_parse_inline_leading_annotation. + */ +__attribute__((export_name("rbs_wasm_parse_inline_trailing_annotation"))) int rbs_wasm_parse_inline_trailing_annotation(const char *source, int length, const char *encoding, int encoding_length, int start_pos, int end_pos, const char *variables, int variables_length) { + return parse_inline_annotation(source, length, encoding, encoding_length, start_pos, end_pos, variables, variables_length, false); +} + +static void w_lex_u32(rbs_allocator_t *allocator, rbs_buffer_t *buffer, uint32_t value) { + unsigned char bytes[4] = { + (unsigned char) (value & 0xff), + (unsigned char) ((value >> 8) & 0xff), + (unsigned char) ((value >> 16) & 0xff), + (unsigned char) ((value >> 24) & 0xff), + }; + rbs_buffer_append_string(allocator, buffer, (const char *) bytes, 4); } /** - * Parse a small, fixed RBS document. + * Lex the source into tokens. The result is a sequence of records, with no + * leading count (the host reads until the buffer is exhausted): * - * This exercises the whole parser path inside WebAssembly without the host - * having to write anything into linear memory, which makes it convenient as a - * build smoke test (`wasmtime run --invoke rbs_wasm_selftest rbs_parser.wasm`). + * [u32 type_name_len][type_name bytes][i32 start_char][i32 end_char] + * + * The final token is always pEOF, mirroring RBS::Parser._lex. + * + * @return 1 always (lexing does not report parse errors here). + */ +__attribute__((export_name("rbs_wasm_lex"))) int rbs_wasm_lex(const char *source, int length, const char *encoding, int encoding_length, int end_pos) { + rbs_allocator_t *allocator = rbs_allocator_init(); + rbs_lexer_t *lexer = rbs_lexer_new(allocator, rbs_string_new(source, source + length), resolve_encoding(encoding, encoding_length), 0, end_pos); + + rbs_buffer_t buffer; + rbs_buffer_init(allocator, &buffer); + + rbs_token_t token = NullToken; + while (token.type != pEOF) { + token = rbs_lexer_next_token(lexer); + + const char *type_name = rbs_token_type_str(token.type); + uint32_t type_name_length = (uint32_t) strlen(type_name); + w_lex_u32(allocator, &buffer, type_name_length); + rbs_buffer_append_string(allocator, &buffer, type_name, type_name_length); + w_lex_u32(allocator, &buffer, (uint32_t) token.range.start.char_pos); + w_lex_u32(allocator, &buffer, (uint32_t) token.range.end.char_pos); + } + + rbs_string_t bytes = rbs_buffer_to_string(&buffer); + size_t n = rbs_string_len(bytes); + memcpy(allocate_result(n), bytes.start, n); + + rbs_allocator_free(allocator); + return 1; +} + +/** + * Parse a small, fixed RBS document, used as a build smoke test + * (`wasmtime run --invoke rbs_wasm_selftest rbs_parser.wasm`). * - * @return 0 if the sample parsed successfully, 1 otherwise. + * @return 1 if the sample parsed successfully, 0 otherwise. */ __attribute__((export_name("rbs_wasm_selftest"))) int rbs_wasm_selftest(void) { static const char source[] = "class User\n" " attr_reader name: String\n" " def initialize: (String name) -> void\n" - "end\n" - "\n" - "module Authentication\n" - " def authenticate: (String, String) -> bool\n" "end\n"; - return rbs_wasm_parse_signature(source, (int) (sizeof(source) - 1)); + int length = (int) (sizeof(source) - 1); + return rbs_wasm_parse_signature(source, length, "UTF-8", 5, 0, length); }