From c731ac54f3d854d751e3275c55a192f846451219 Mon Sep 17 00:00:00 2001 From: ydah Date: Sat, 22 Feb 2025 02:21:41 +0900 Subject: [PATCH] Support Token Categories I considered introducing **Token Categories** into Lrama, inspired by Chevrotain's Token Categories. This mechanism allows multiple token types to be logically grouped together so they can be matched collectively in grammar rules. Reference: [Chevrotain Token Categories](https://chevrotain.io/docs/features/token_categories.html) For example, in `parse.y`, we currently define grammar rules like this: ```yacc p_cases : opt_else | p_case_body ; ``` This rule matches two token types: `opt_else` and `p_case_body`. If we introduce a **Token Category** named `p_cases` to group these token types, we could define the rule more concisely: ```yacc %token-categories p_cases: opt_else p_case_body ``` By grouping multiple token types into a single category, grammar rules only need to consume the category, simplifying rule definitions. In Chevrotain, tokens can belong to multiple categories by specifying categories in their definitions. However, this approach makes it difficult to see which tokens belong to a given category at a glance. To address this, Lrama explicitly lists token members in the category definition. --- lib/lrama.rb | 1 + lib/lrama/category.rb | 27 + lib/lrama/grammar.rb | 23 + lib/lrama/lexer.rb | 2 +- lib/lrama/parser.rb | 887 ++++++++++++++++--------------- parser.y | 6 + sig/generated/lrama/category.rbs | 19 + sig/lrama/grammar.rbs | 6 + spec/fixtures/categories/basic.y | 41 ++ spec/lrama/parser_spec.rb | 81 +++ 10 files changed, 659 insertions(+), 434 deletions(-) create mode 100644 lib/lrama/category.rb create mode 100644 sig/generated/lrama/category.rbs create mode 100644 spec/fixtures/categories/basic.y diff --git a/lib/lrama.rb b/lib/lrama.rb index 56ba0044..f75effff 100644 --- a/lib/lrama.rb +++ b/lib/lrama.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative "lrama/bitmap" +require_relative "lrama/category" require_relative "lrama/command" require_relative "lrama/context" require_relative "lrama/counterexamples" diff --git a/lib/lrama/category.rb b/lib/lrama/category.rb new file mode 100644 index 00000000..06fb6e5b --- /dev/null +++ b/lib/lrama/category.rb @@ -0,0 +1,27 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Category + attr_reader :id #: Lrama::Lexer::Token::Ident + attr_reader :name #: String + attr_reader :tokens #: Array[Lrama::Lexer::Token::Ident] + attr_reader :tag #: Lexer::Token::Tag + + # @rbs (id: Lrama::Lexer::Token::Ident) -> void + def initialize(id:) + @id = id + @name = id.s_value + @tokens = [] + @tag = nil + end + + # @rbs (Array[Lrama::Lexer::Token::Ident] tokens, Lexer::Token::Tag tag) -> void + def add_tokens(tokens, tag) + @tag = tag + tokens.each do |token| + @tokens << Lrama::Lexer::Token::Ident.new(s_value: token.s_value, location: token.location) + end + end + end +end diff --git a/lib/lrama/grammar.rb b/lib/lrama/grammar.rb index 88fd057c..f30dfa91 100644 --- a/lib/lrama/grammar.rb +++ b/lib/lrama/grammar.rb @@ -60,6 +60,7 @@ def initialize(rule_counter, define = {}) @locations = false @define = define @required = false + @categories = {} append_special_symbols end @@ -113,6 +114,14 @@ def set_union(code, lineno) @union = Union.new(code: code, lineno: lineno) end + def add_category(id:, tokens:, tag:) + unless category = find_category(id.s_value) + category = Lrama::Category.new(id: id) + @categories[id.s_value] = category + end + category.add_tokens(tokens, tag) + end + def add_rule_builder(builder) @rule_builders << builder end @@ -182,6 +191,10 @@ def unique_rule_s_values @rules.map {|rule| rule.lhs.id.s_value }.uniq end + def find_category(name) + @categories[name] + end + def ielr_defined? @define.key?('lr.type') && @define['lr.type'] == 'ielr' end @@ -335,9 +348,19 @@ def normalize_rules end end + expand_categories + @rules.sort_by!(&:id) end + def expand_categories + @categories.values.each do |category| + lhs = Lrama::Lexer::Token::Ident.new(s_value: category.name, location: category.id.location) + add_nterm(id: lhs, tag: category.tag) + @rules << Rule.new(id: @rule_counter.increment, _lhs: lhs, _rhs: category.tokens, lhs_tag: category.tag, token_code: nil, lineno: lhs.line) + end + end + # Collect symbols from rules def collect_symbols @rules.flat_map(&:_rhs).each do |s| diff --git a/lib/lrama/lexer.rb b/lib/lrama/lexer.rb index 5c9ce9f1..67aff7de 100644 --- a/lib/lrama/lexer.rb +++ b/lib/lrama/lexer.rb @@ -14,6 +14,7 @@ class Lexer SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';'].freeze PERCENT_TOKENS = %w( %union + %token-categories %token %type %nterm @@ -42,7 +43,6 @@ class Lexer %no-stdlib %inline %locations - %categories ).freeze def initialize(grammar_file) diff --git a/lib/lrama/parser.rb b/lib/lrama/parser.rb index 54c7e623..cb1dc763 100644 --- a/lib/lrama/parser.rb +++ b/lib/lrama/parser.rb @@ -654,7 +654,7 @@ def token_to_str(t) module Lrama class Parser < Racc::Parser -module_eval(<<'...end parser.y/module_eval...', 'parser.y', 428) +module_eval(<<'...end parser.y/module_eval...', 'parser.y', 434) include Lrama::Tracer::Duration @@ -729,306 +729,314 @@ def raise_parse_error(error_message, location) ##### State transition tables begin ### racc_action_table = [ - 89, 49, 90, 167, 49, 101, 173, 49, 101, 167, - 49, 101, 173, 6, 101, 80, 49, 49, 48, 48, - 41, 76, 76, 49, 49, 48, 48, 42, 76, 76, - 49, 49, 48, 48, 101, 96, 113, 49, 87, 48, - 150, 101, 96, 151, 45, 171, 169, 170, 151, 176, - 170, 91, 169, 170, 81, 176, 170, 20, 24, 25, - 26, 27, 28, 29, 30, 31, 87, 32, 33, 34, - 35, 36, 37, 38, 39, 49, 4, 48, 5, 101, - 96, 181, 182, 183, 128, 20, 24, 25, 26, 27, - 28, 29, 30, 31, 46, 32, 33, 34, 35, 36, - 37, 38, 39, 11, 12, 13, 14, 15, 16, 17, - 18, 19, 53, 20, 24, 25, 26, 27, 28, 29, - 30, 31, 53, 32, 33, 34, 35, 36, 37, 38, - 39, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 44, 20, 24, 25, 26, 27, 28, 29, 30, 31, - 53, 32, 33, 34, 35, 36, 37, 38, 39, 49, - 4, 48, 5, 101, 96, 49, 49, 48, 48, 101, - 101, 49, 49, 48, 48, 101, 101, 49, 49, 48, - 197, 101, 101, 49, 49, 197, 48, 101, 101, 49, - 49, 197, 48, 101, 181, 182, 183, 128, 204, 210, - 217, 205, 205, 205, 49, 49, 48, 48, 49, 49, - 48, 48, 49, 49, 48, 48, 181, 182, 183, 116, - 117, 56, 53, 53, 53, 53, 53, 62, 63, 64, - 65, 66, 68, 68, 68, 82, 53, 53, 104, 108, - 108, 115, 122, 123, 125, 128, 129, 133, 139, 140, - 141, 142, 144, 145, 101, 154, 139, 157, 154, 161, - 162, 68, 164, 165, 172, 177, 154, 184, 128, 188, - 154, 190, 128, 154, 199, 154, 128, 68, 165, 206, - 165, 68, 68, 215, 128, 68 ] + 92, 50, 93, 173, 50, 104, 179, 50, 104, 173, + 50, 104, 179, 6, 104, 83, 50, 50, 49, 49, + 42, 77, 77, 50, 50, 49, 49, 43, 77, 77, + 50, 4, 49, 5, 104, 99, 50, 46, 49, 90, + 104, 99, 47, 4, 50, 5, 49, 175, 176, 116, + 182, 176, 94, 175, 176, 84, 182, 176, 20, 24, + 25, 26, 27, 28, 29, 30, 31, 90, 32, 33, + 34, 35, 36, 37, 38, 39, 50, 40, 49, 54, + 104, 99, 187, 188, 189, 132, 20, 24, 25, 26, + 27, 28, 29, 30, 31, 54, 32, 33, 34, 35, + 36, 37, 38, 39, 54, 40, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 57, 20, 24, 25, 26, + 27, 28, 29, 30, 31, 54, 32, 33, 34, 35, + 36, 37, 38, 39, 54, 40, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 45, 20, 24, 25, 26, + 27, 28, 29, 30, 31, 54, 32, 33, 34, 35, + 36, 37, 38, 39, 50, 40, 49, 54, 104, 99, + 50, 50, 49, 49, 104, 104, 50, 50, 49, 49, + 104, 104, 50, 50, 49, 203, 104, 104, 50, 50, + 203, 49, 104, 104, 50, 50, 203, 49, 104, 187, + 188, 189, 132, 50, 50, 49, 49, 50, 50, 49, + 49, 50, 50, 49, 49, 50, 156, 49, 157, 177, + 210, 157, 211, 187, 188, 189, 216, 223, 211, 211, + 120, 121, 54, 63, 64, 65, 66, 67, 69, 69, + 69, 69, 85, 54, 54, 107, 111, 111, 118, 119, + 126, 127, 129, 132, 133, 137, 142, 144, 145, 146, + 147, 149, 150, 104, 160, 144, 163, 160, 167, 168, + 69, 170, 171, 178, 183, 160, 190, 132, 194, 160, + 196, 132, 160, 205, 160, 132, 69, 171, 212, 171, + 69, 69, 221, 132, 69 ] racc_action_check = [ - 47, 153, 47, 153, 159, 153, 159, 178, 159, 178, - 189, 178, 189, 1, 189, 39, 35, 36, 35, 36, + 48, 159, 48, 159, 165, 159, 165, 184, 165, 184, + 195, 184, 195, 1, 195, 40, 35, 36, 35, 36, 5, 35, 36, 37, 38, 37, 38, 6, 37, 38, - 59, 74, 59, 74, 59, 59, 74, 60, 45, 60, - 138, 60, 60, 138, 9, 156, 153, 153, 156, 159, - 159, 47, 178, 178, 39, 189, 189, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 83, 45, 45, 45, - 45, 45, 45, 45, 45, 61, 0, 61, 0, 61, - 61, 166, 166, 166, 166, 83, 83, 83, 83, 83, - 83, 83, 83, 83, 11, 83, 83, 83, 83, 83, - 83, 83, 83, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 13, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 14, 3, 3, 3, 3, 3, 3, 3, - 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 60, 0, 60, 0, 60, 60, 61, 9, 61, 46, + 61, 61, 11, 2, 75, 2, 75, 159, 159, 75, + 165, 165, 48, 184, 184, 40, 195, 195, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 86, 46, 46, + 46, 46, 46, 46, 46, 46, 62, 46, 62, 13, + 62, 62, 172, 172, 172, 172, 86, 86, 86, 86, + 86, 86, 86, 86, 86, 14, 86, 86, 86, 86, + 86, 86, 86, 86, 15, 86, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 16, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 17, 3, 3, 3, 3, + 3, 3, 3, 3, 20, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 15, 8, 8, 8, 8, 8, 8, 8, 8, 97, - 2, 97, 2, 97, 97, 71, 108, 71, 108, 71, - 108, 109, 169, 109, 169, 109, 169, 176, 184, 176, - 184, 176, 184, 190, 205, 190, 205, 190, 205, 206, - 12, 206, 12, 206, 174, 174, 174, 174, 196, 201, - 214, 196, 201, 214, 69, 76, 69, 76, 104, 105, - 104, 105, 111, 113, 111, 113, 198, 198, 198, 81, - 81, 16, 17, 20, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 40, 51, 56, 67, 70, - 72, 80, 84, 85, 86, 87, 93, 107, 115, 116, - 117, 118, 127, 128, 134, 140, 141, 143, 144, 145, - 146, 150, 151, 152, 158, 163, 165, 167, 168, 171, - 172, 173, 175, 177, 187, 188, 192, 193, 195, 197, - 200, 202, 204, 209, 210, 216 ] + 8, 8, 8, 8, 8, 24, 8, 8, 8, 8, + 8, 8, 8, 8, 100, 8, 100, 25, 100, 100, + 72, 111, 72, 111, 72, 111, 112, 175, 112, 175, + 112, 175, 182, 190, 182, 190, 182, 190, 196, 211, + 196, 211, 196, 211, 212, 12, 212, 12, 212, 180, + 180, 180, 180, 70, 77, 70, 77, 107, 108, 107, + 108, 114, 116, 114, 116, 142, 143, 142, 143, 162, + 202, 162, 202, 204, 204, 204, 207, 220, 207, 220, + 84, 84, 26, 27, 28, 29, 30, 31, 32, 33, + 34, 39, 41, 52, 57, 68, 71, 73, 82, 83, + 87, 88, 89, 90, 96, 110, 118, 119, 120, 121, + 122, 131, 132, 138, 145, 146, 148, 149, 150, 151, + 156, 157, 158, 164, 169, 171, 173, 174, 177, 178, + 179, 181, 183, 193, 194, 198, 199, 201, 203, 206, + 208, 210, 215, 216, 222 ] racc_action_pointer = [ - 66, 13, 150, 90, nil, 13, 27, nil, 118, 35, - nil, 88, 187, 63, 73, 101, 216, 173, nil, nil, - 174, nil, nil, nil, 175, 176, 177, 222, 223, 224, - 225, 226, 224, 225, 226, 13, 14, 20, 21, 10, - 233, nil, nil, nil, nil, 34, nil, -5, nil, nil, - nil, 187, nil, nil, nil, nil, 188, nil, nil, 27, - 34, 72, nil, nil, nil, nil, nil, 230, nil, 201, - 231, 162, 232, nil, 28, nil, 202, nil, nil, nil, - 200, 215, nil, 62, 233, 221, 222, 191, nil, nil, - nil, nil, nil, 244, nil, nil, nil, 156, nil, nil, - nil, nil, nil, nil, 205, 206, nil, 241, 163, 168, - nil, 209, nil, 210, nil, 243, 206, 209, 240, nil, - nil, nil, nil, nil, nil, nil, nil, 209, 248, nil, - nil, nil, nil, nil, 247, nil, nil, nil, -2, nil, - 208, 251, nil, 255, 211, 204, 210, nil, nil, nil, - 253, 257, 217, -2, nil, nil, 3, nil, 218, 1, - nil, nil, nil, 222, nil, 219, 30, 226, 214, 169, - nil, 226, 223, 230, 143, 218, 174, 226, 4, nil, - nil, nil, nil, nil, 175, nil, nil, 272, 228, 7, - 180, nil, 222, 269, nil, 232, 156, 238, 165, nil, - 234, 157, 273, nil, 274, 181, 186, nil, nil, 233, - 230, nil, nil, nil, 158, nil, 277, nil, nil ] + 21, 13, 33, 93, nil, 13, 27, nil, 123, 28, + nil, 36, 192, 29, 45, 54, 110, 75, nil, nil, + 84, nil, nil, nil, 105, 117, 182, 228, 229, 230, + 231, 232, 230, 231, 232, 13, 14, 20, 21, 233, + 10, 240, nil, nil, nil, nil, 35, nil, -5, nil, + nil, nil, 193, nil, nil, nil, nil, 194, nil, nil, + 27, 33, 73, nil, nil, nil, nil, nil, 237, nil, + 200, 238, 167, 239, nil, 41, nil, 201, nil, nil, + nil, nil, 244, 206, 226, nil, 63, 241, 229, 230, + 198, nil, nil, nil, nil, nil, 252, nil, nil, nil, + 161, nil, nil, nil, nil, nil, nil, 204, 205, nil, + 249, 168, 173, nil, 208, nil, 209, nil, 215, 252, + 217, 216, 249, nil, nil, nil, nil, nil, nil, nil, + nil, 220, 257, nil, nil, nil, nil, nil, 256, nil, + nil, nil, 212, 172, nil, 216, 260, nil, 264, 219, + 212, 218, nil, nil, nil, nil, 262, 266, 225, -2, + nil, nil, 175, nil, 226, 1, nil, nil, nil, 233, + nil, 227, 30, 233, 222, 174, nil, 237, 231, 237, + 147, 226, 179, 234, 4, nil, nil, nil, nil, nil, + 180, nil, nil, 281, 236, 7, 185, nil, 230, 278, + nil, 240, 176, 245, 171, nil, 242, 182, 282, nil, + 283, 186, 191, nil, nil, 241, 238, nil, nil, nil, + 183, nil, 286, nil, nil ] racc_action_default = [ - -1, -128, -1, -3, -10, -128, -128, -2, -3, -128, - -16, -128, -128, -128, -128, -128, -128, -128, -24, -25, - -128, -32, -33, -34, -128, -128, -128, -128, -128, -128, - -128, -128, -50, -50, -50, -128, -128, -128, -128, -128, - -128, -13, 219, -4, -26, -128, -17, -123, -93, -94, - -122, -14, -19, -85, -20, -21, -128, -23, -31, -128, - -128, -128, -38, -39, -40, -41, -42, -43, -51, -128, - -44, -128, -45, -46, -88, -90, -128, -47, -48, -49, - -128, -128, -11, -5, -7, -95, -128, -68, -18, -124, - -125, -126, -15, -128, -22, -27, -28, -29, -35, -83, - -84, -127, -36, -37, -128, -52, -54, -56, -128, -79, - -81, -88, -89, -128, -91, -128, -128, -128, -128, -6, - -8, -9, -120, -96, -97, -98, -69, -128, -128, -86, - -30, -55, -53, -57, -76, -82, -80, -92, -128, -62, - -66, -128, -12, -128, -66, -128, -128, -58, -77, -78, - -50, -128, -60, -64, -67, -70, -128, -121, -99, -100, - -102, -119, -87, -128, -63, -66, -68, -93, -68, -128, - -116, -128, -66, -93, -68, -68, -128, -66, -65, -71, - -72, -108, -109, -110, -128, -74, -75, -128, -66, -101, - -128, -103, -68, -50, -107, -59, -128, -93, -111, -117, - -61, -128, -50, -106, -50, -128, -128, -112, -113, -128, - -68, -104, -73, -114, -128, -118, -50, -115, -105 ] + -1, -130, -1, -3, -10, -130, -130, -2, -3, -130, + -16, -130, -130, -130, -130, -130, -130, -130, -24, -25, + -130, -32, -33, -34, -130, -130, -130, -130, -130, -130, + -130, -130, -51, -51, -51, -130, -130, -130, -130, -51, + -130, -130, -13, 225, -4, -26, -130, -17, -125, -95, + -96, -124, -14, -19, -89, -20, -21, -130, -23, -31, + -130, -130, -130, -38, -39, -40, -41, -42, -43, -52, + -130, -44, -130, -45, -46, -60, -92, -130, -47, -48, + -49, -50, -130, -130, -130, -11, -5, -7, -97, -130, + -72, -18, -126, -127, -128, -15, -130, -22, -27, -28, + -29, -35, -87, -88, -129, -36, -37, -130, -53, -55, + -57, -130, -83, -85, -60, -61, -130, -93, -130, -130, + -130, -130, -130, -6, -8, -9, -122, -98, -99, -100, + -73, -130, -130, -90, -30, -56, -54, -58, -80, -86, + -84, -94, -130, -130, -66, -70, -130, -12, -130, -70, + -130, -130, -59, -81, -82, -62, -51, -130, -64, -68, + -71, -74, -130, -123, -101, -102, -104, -121, -91, -130, + -67, -70, -72, -95, -72, -130, -118, -130, -70, -95, + -72, -72, -130, -70, -69, -75, -76, -110, -111, -112, + -130, -78, -79, -130, -70, -103, -130, -105, -72, -51, + -109, -63, -130, -95, -113, -119, -65, -130, -51, -108, + -51, -130, -130, -114, -115, -130, -72, -106, -77, -116, + -130, -120, -51, -117, -107 ] racc_goto_table = [ - 69, 109, 50, 152, 57, 127, 84, 58, 112, 160, - 114, 59, 60, 61, 86, 52, 54, 55, 98, 102, - 103, 159, 106, 110, 175, 74, 74, 74, 74, 138, - 9, 1, 3, 180, 7, 43, 120, 160, 109, 109, - 195, 192, 121, 94, 119, 112, 40, 137, 118, 189, - 47, 200, 86, 92, 175, 156, 130, 131, 132, 107, - 135, 136, 88, 196, 111, 207, 111, 70, 72, 201, - 73, 77, 78, 79, 67, 147, 134, 178, 148, 149, - 93, 146, 124, 166, 179, 214, 185, 158, 208, 174, - 187, 209, 191, 193, 107, 107, 143, nil, nil, 186, - nil, 111, nil, 111, nil, nil, 194, nil, 166, nil, - 202, nil, nil, nil, 198, nil, nil, nil, 163, 174, - 198, nil, nil, nil, nil, nil, nil, nil, 216, nil, - nil, nil, nil, nil, nil, 213, 198, nil, nil, nil, + 70, 131, 112, 181, 51, 158, 115, 82, 117, 87, + 89, 58, 143, 113, 59, 101, 105, 106, 60, 61, + 62, 53, 55, 56, 186, 1, 109, 75, 75, 75, + 75, 124, 198, 181, 125, 9, 166, 165, 41, 162, + 44, 112, 112, 201, 3, 115, 7, 141, 122, 123, + 89, 97, 139, 140, 206, 134, 213, 71, 73, 48, + 95, 91, 110, 135, 136, 166, 195, 114, 202, 114, + 68, 81, 152, 155, 207, 74, 78, 79, 80, 138, + 184, 153, 154, 185, 96, 191, 151, 128, 164, 172, + 220, 197, 199, 214, 193, 180, 215, 148, nil, 110, + 110, nil, nil, nil, nil, 192, 114, nil, 114, 208, + nil, nil, 200, nil, 172, nil, nil, nil, nil, nil, + 204, nil, nil, nil, 169, 180, 204, 222, nil, nil, + nil, nil, nil, nil, 114, nil, nil, nil, nil, nil, + nil, 219, 204, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, 209, nil, nil, + nil, nil, nil, nil, nil, nil, 217, nil, 218, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, - nil, 203, nil, nil, nil, nil, nil, nil, nil, nil, - 211, nil, 212, nil, nil, nil, nil, nil, nil, nil, - nil, nil, nil, nil, 218 ] + 224 ] racc_goto_check = [ - 27, 20, 29, 33, 15, 40, 8, 15, 46, 39, - 46, 15, 15, 15, 12, 16, 16, 16, 22, 22, - 22, 50, 28, 43, 38, 29, 29, 29, 29, 32, - 7, 1, 6, 36, 6, 7, 5, 39, 20, 20, - 33, 36, 9, 15, 8, 46, 10, 46, 11, 50, - 13, 33, 12, 16, 38, 32, 22, 28, 28, 29, - 43, 43, 14, 37, 29, 36, 29, 24, 24, 37, - 25, 25, 25, 25, 23, 30, 31, 34, 41, 42, - 44, 45, 48, 20, 40, 37, 40, 49, 51, 20, - 52, 53, 40, 40, 29, 29, 54, nil, nil, 20, - nil, 29, nil, 29, nil, nil, 20, nil, 20, nil, - 40, nil, nil, nil, 20, nil, nil, nil, 27, 20, - 20, nil, nil, nil, nil, nil, nil, nil, 40, nil, - nil, nil, nil, nil, nil, 20, 20, nil, nil, nil, + 28, 42, 20, 40, 30, 35, 33, 28, 33, 8, + 12, 15, 34, 45, 15, 22, 22, 22, 15, 15, + 15, 16, 16, 16, 38, 1, 29, 30, 30, 30, + 30, 5, 38, 40, 9, 7, 41, 51, 10, 34, + 7, 20, 20, 35, 6, 33, 6, 33, 11, 8, + 12, 15, 45, 45, 35, 22, 38, 24, 24, 13, + 16, 14, 30, 29, 29, 41, 51, 30, 39, 30, + 23, 26, 31, 33, 39, 25, 25, 25, 25, 32, + 36, 43, 44, 42, 46, 42, 47, 49, 50, 20, + 39, 42, 42, 52, 53, 20, 54, 55, nil, 30, + 30, nil, nil, nil, nil, 20, 30, nil, 30, 42, + nil, nil, 20, nil, 20, nil, nil, nil, nil, nil, + 20, nil, nil, nil, 28, 20, 20, 42, nil, nil, + nil, nil, nil, nil, 30, nil, nil, nil, nil, nil, + nil, 20, 20, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, 28, nil, nil, + nil, nil, nil, nil, nil, nil, 28, nil, 28, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, - nil, 27, nil, nil, nil, nil, nil, nil, nil, nil, - 27, nil, 27, nil, nil, nil, nil, nil, nil, nil, - nil, nil, nil, nil, 27 ] + 28 ] racc_goto_pointer = [ - nil, 31, nil, nil, nil, -48, 32, 27, -39, -42, - 42, -34, -31, 38, 15, -13, 2, nil, nil, nil, - -70, nil, -41, 42, 34, 35, nil, -32, -47, -10, - -59, -31, -86, -137, -88, nil, -133, -121, -135, -135, - -82, -56, -55, -48, 27, -48, -66, nil, -3, -57, - -123, -110, -80, -108, -26 ] + nil, 25, nil, nil, nil, -56, 44, 32, -37, -53, + 34, -37, -36, 47, 13, -6, 8, nil, nil, nil, + -70, nil, -45, 38, 24, 40, 32, nil, -32, -44, + -8, -66, -31, -69, -107, -140, -91, nil, -148, -122, + -162, -113, -89, -57, -56, -59, 30, -47, nil, -1, + -61, -112, -111, -82, -109, -29 ] racc_goto_default = [ - nil, nil, 2, 8, 83, nil, nil, nil, nil, nil, - nil, nil, 10, nil, nil, 51, nil, 21, 22, 23, - 95, 97, nil, nil, nil, nil, 105, 71, nil, 99, - nil, nil, nil, nil, 153, 126, nil, nil, 168, 155, - nil, 100, nil, nil, nil, nil, 75, 85, nil, nil, - nil, nil, nil, nil, nil ] + nil, nil, 2, 8, 86, nil, nil, nil, nil, nil, + nil, nil, 10, nil, nil, 52, nil, 21, 22, 23, + 98, 100, nil, nil, nil, nil, nil, 108, 72, nil, + 102, nil, nil, 76, nil, nil, 159, 130, nil, nil, + 174, 161, nil, 103, nil, nil, nil, nil, 88, nil, + nil, nil, nil, nil, nil, nil ] racc_reduce_table = [ 0, 0, :racc_error, - 0, 63, :_reduce_1, - 2, 63, :_reduce_2, - 0, 64, :_reduce_3, - 2, 64, :_reduce_4, - 1, 65, :_reduce_5, - 2, 65, :_reduce_6, - 0, 66, :_reduce_none, - 1, 66, :_reduce_none, - 5, 58, :_reduce_none, - 0, 67, :_reduce_10, - 0, 68, :_reduce_11, - 5, 59, :_reduce_12, - 2, 59, :_reduce_13, - 1, 73, :_reduce_14, - 2, 73, :_reduce_15, - 1, 60, :_reduce_none, - 2, 60, :_reduce_17, - 3, 60, :_reduce_18, - 2, 60, :_reduce_none, - 2, 60, :_reduce_20, - 2, 60, :_reduce_21, - 3, 60, :_reduce_22, - 2, 60, :_reduce_23, - 1, 60, :_reduce_24, - 1, 60, :_reduce_25, - 2, 60, :_reduce_none, - 1, 78, :_reduce_27, - 1, 78, :_reduce_28, - 1, 79, :_reduce_29, - 2, 79, :_reduce_30, - 2, 69, :_reduce_31, - 1, 69, :_reduce_none, - 1, 69, :_reduce_none, - 1, 69, :_reduce_none, - 3, 69, :_reduce_35, - 3, 69, :_reduce_36, - 3, 69, :_reduce_37, - 2, 69, :_reduce_38, - 2, 69, :_reduce_39, - 2, 69, :_reduce_40, - 2, 69, :_reduce_41, - 2, 69, :_reduce_42, - 2, 74, :_reduce_none, - 2, 74, :_reduce_44, - 2, 74, :_reduce_45, - 2, 74, :_reduce_46, - 2, 74, :_reduce_47, - 2, 74, :_reduce_48, - 2, 74, :_reduce_49, - 0, 84, :_reduce_none, - 1, 84, :_reduce_none, - 1, 85, :_reduce_52, - 2, 85, :_reduce_53, - 2, 80, :_reduce_54, - 3, 80, :_reduce_55, - 0, 88, :_reduce_none, - 1, 88, :_reduce_none, - 3, 83, :_reduce_58, - 8, 75, :_reduce_59, - 5, 76, :_reduce_60, - 8, 76, :_reduce_61, - 1, 89, :_reduce_62, - 3, 89, :_reduce_63, - 1, 90, :_reduce_64, - 3, 90, :_reduce_65, - 0, 96, :_reduce_none, - 1, 96, :_reduce_none, - 0, 97, :_reduce_none, - 1, 97, :_reduce_none, - 1, 91, :_reduce_70, - 3, 91, :_reduce_71, - 3, 91, :_reduce_72, - 6, 91, :_reduce_73, - 3, 91, :_reduce_74, - 3, 91, :_reduce_75, - 0, 99, :_reduce_none, - 1, 99, :_reduce_none, - 1, 87, :_reduce_78, - 1, 100, :_reduce_79, - 2, 100, :_reduce_80, - 2, 81, :_reduce_81, - 3, 81, :_reduce_82, - 1, 77, :_reduce_none, - 1, 77, :_reduce_none, - 0, 101, :_reduce_85, - 0, 102, :_reduce_86, - 5, 72, :_reduce_87, - 1, 103, :_reduce_88, - 2, 103, :_reduce_89, - 1, 82, :_reduce_90, - 2, 82, :_reduce_91, - 3, 82, :_reduce_92, - 1, 86, :_reduce_93, - 1, 86, :_reduce_94, - 0, 105, :_reduce_none, - 1, 105, :_reduce_none, + 0, 64, :_reduce_1, + 2, 64, :_reduce_2, + 0, 65, :_reduce_3, + 2, 65, :_reduce_4, + 1, 66, :_reduce_5, + 2, 66, :_reduce_6, + 0, 67, :_reduce_none, + 1, 67, :_reduce_none, + 5, 59, :_reduce_none, + 0, 68, :_reduce_10, + 0, 69, :_reduce_11, + 5, 60, :_reduce_12, + 2, 60, :_reduce_13, + 1, 74, :_reduce_14, + 2, 74, :_reduce_15, + 1, 61, :_reduce_none, + 2, 61, :_reduce_17, + 3, 61, :_reduce_18, 2, 61, :_reduce_none, + 2, 61, :_reduce_20, + 2, 61, :_reduce_21, + 3, 61, :_reduce_22, + 2, 61, :_reduce_23, + 1, 61, :_reduce_24, + 1, 61, :_reduce_25, 2, 61, :_reduce_none, - 4, 104, :_reduce_99, - 1, 106, :_reduce_100, - 3, 106, :_reduce_101, - 1, 107, :_reduce_102, - 3, 107, :_reduce_103, - 5, 107, :_reduce_104, - 7, 107, :_reduce_105, - 4, 107, :_reduce_106, - 3, 107, :_reduce_107, - 1, 93, :_reduce_108, - 1, 93, :_reduce_109, - 1, 93, :_reduce_110, - 0, 108, :_reduce_none, - 1, 108, :_reduce_none, - 2, 94, :_reduce_113, - 3, 94, :_reduce_114, - 4, 94, :_reduce_115, - 0, 109, :_reduce_116, - 0, 110, :_reduce_117, - 5, 95, :_reduce_118, - 3, 92, :_reduce_119, - 0, 111, :_reduce_120, - 3, 62, :_reduce_121, + 1, 79, :_reduce_27, + 1, 79, :_reduce_28, + 1, 80, :_reduce_29, + 2, 80, :_reduce_30, + 2, 70, :_reduce_31, 1, 70, :_reduce_none, - 0, 71, :_reduce_none, - 1, 71, :_reduce_none, - 1, 71, :_reduce_none, + 1, 70, :_reduce_none, + 1, 70, :_reduce_none, + 3, 70, :_reduce_35, + 3, 70, :_reduce_36, + 3, 70, :_reduce_37, + 2, 70, :_reduce_38, + 2, 70, :_reduce_39, + 2, 70, :_reduce_40, + 2, 70, :_reduce_41, + 2, 70, :_reduce_42, + 2, 75, :_reduce_none, + 2, 75, :_reduce_44, + 2, 75, :_reduce_45, + 2, 75, :_reduce_46, + 2, 75, :_reduce_47, + 2, 75, :_reduce_48, + 2, 75, :_reduce_49, + 2, 75, :_reduce_none, + 0, 86, :_reduce_none, + 1, 86, :_reduce_none, + 1, 87, :_reduce_53, + 2, 87, :_reduce_54, + 2, 81, :_reduce_55, + 3, 81, :_reduce_56, + 0, 90, :_reduce_none, + 1, 90, :_reduce_none, + 3, 85, :_reduce_59, + 1, 91, :_reduce_60, + 2, 91, :_reduce_61, + 4, 84, :_reduce_62, + 8, 76, :_reduce_63, + 5, 77, :_reduce_64, + 8, 77, :_reduce_65, + 1, 92, :_reduce_66, + 3, 92, :_reduce_67, + 1, 93, :_reduce_68, + 3, 93, :_reduce_69, + 0, 99, :_reduce_none, + 1, 99, :_reduce_none, + 0, 100, :_reduce_none, + 1, 100, :_reduce_none, + 1, 94, :_reduce_74, + 3, 94, :_reduce_75, + 3, 94, :_reduce_76, + 6, 94, :_reduce_77, + 3, 94, :_reduce_78, + 3, 94, :_reduce_79, + 0, 102, :_reduce_none, + 1, 102, :_reduce_none, + 1, 89, :_reduce_82, + 1, 103, :_reduce_83, + 2, 103, :_reduce_84, + 2, 82, :_reduce_85, + 3, 82, :_reduce_86, + 1, 78, :_reduce_none, + 1, 78, :_reduce_none, + 0, 104, :_reduce_89, + 0, 105, :_reduce_90, + 5, 73, :_reduce_91, + 1, 83, :_reduce_92, + 2, 83, :_reduce_93, + 3, 83, :_reduce_94, + 1, 88, :_reduce_95, + 1, 88, :_reduce_96, + 0, 107, :_reduce_none, + 1, 107, :_reduce_none, + 2, 62, :_reduce_none, + 2, 62, :_reduce_none, + 4, 106, :_reduce_101, + 1, 108, :_reduce_102, + 3, 108, :_reduce_103, + 1, 109, :_reduce_104, + 3, 109, :_reduce_105, + 5, 109, :_reduce_106, + 7, 109, :_reduce_107, + 4, 109, :_reduce_108, + 3, 109, :_reduce_109, + 1, 96, :_reduce_110, + 1, 96, :_reduce_111, + 1, 96, :_reduce_112, + 0, 110, :_reduce_none, + 1, 110, :_reduce_none, + 2, 97, :_reduce_115, + 3, 97, :_reduce_116, + 4, 97, :_reduce_117, + 0, 111, :_reduce_118, + 0, 112, :_reduce_119, + 5, 98, :_reduce_120, + 3, 95, :_reduce_121, + 0, 113, :_reduce_122, + 3, 63, :_reduce_123, 1, 71, :_reduce_none, - 1, 98, :_reduce_127 ] + 0, 72, :_reduce_none, + 1, 72, :_reduce_none, + 1, 72, :_reduce_none, + 1, 72, :_reduce_none, + 1, 101, :_reduce_129 ] -racc_reduce_n = 128 +racc_reduce_n = 130 -racc_shift_n = 219 +racc_shift_n = 225 racc_token_table = { false => 0, @@ -1071,25 +1079,26 @@ def raise_parse_error(error_message, location) "%right" => 37, "%precedence" => 38, "%nonassoc" => 39, - "%rule" => 40, - "(" => 41, - ")" => 42, - ":" => 43, - "%inline" => 44, - "," => 45, - "|" => 46, - "%empty" => 47, - "%prec" => 48, - "{" => 49, - "}" => 50, - "?" => 51, - "+" => 52, - "*" => 53, - "[" => 54, - "]" => 55, - "{...}" => 56 } - -racc_nt_base = 57 + "%token-categories" => 40, + ":" => 41, + "%rule" => 42, + "(" => 43, + ")" => 44, + "%inline" => 45, + "," => 46, + "|" => 47, + "%empty" => 48, + "%prec" => 49, + "{" => 50, + "}" => 51, + "?" => 52, + "+" => 53, + "*" => 54, + "[" => 55, + "]" => 56, + "{...}" => 57 } + +racc_nt_base = 58 racc_use_result_var = true @@ -1151,10 +1160,11 @@ def raise_parse_error(error_message, location) "\"%right\"", "\"%precedence\"", "\"%nonassoc\"", + "\"%token-categories\"", + "\":\"", "\"%rule\"", "\"(\"", "\")\"", - "\":\"", "\"%inline\"", "\",\"", "\"|\"", @@ -1194,12 +1204,14 @@ def raise_parse_error(error_message, location) "token_declarations", "symbol_declarations", "token_declarations_for_precedence", + "token_categories_declarations", "token_declaration", "\"-option@TAG\"", "\"-many1@token_declaration\"", "id", "alias", "\"-option@INTEGER\"", + "\"-many1@id\"", "rule_args", "rule_rhs_list", "rule_rhs", @@ -1214,7 +1226,6 @@ def raise_parse_error(error_message, location) "\"-many1@symbol\"", "@3", "@4", - "\"-many1@id\"", "rules", "\"-option@;\"", "rhs_list", @@ -1606,22 +1617,24 @@ def _reduce_49(val, _values, result) # reduce 51 omitted -module_eval(<<'.,.,', 'parser.y', 184) - def _reduce_52(val, _values, result) +# reduce 52 omitted + +module_eval(<<'.,.,', 'parser.y', 185) + def _reduce_53(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 184) - def _reduce_53(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 185) + def _reduce_54(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 172) - def _reduce_54(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 173) + def _reduce_55(val, _values, result) val[1].each {|token_declaration| @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: val[0], replace: true) } @@ -1630,8 +1643,8 @@ def _reduce_54(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 178) - def _reduce_55(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 179) + def _reduce_56(val, _values, result) val[2].each {|token_declaration| @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: val[1], replace: true) } @@ -1640,19 +1653,41 @@ def _reduce_55(val, _values, result) end .,., -# reduce 56 omitted - # reduce 57 omitted -module_eval(<<'.,.,', 'parser.y', 183) - def _reduce_58(val, _values, result) +# reduce 58 omitted + +module_eval(<<'.,.,', 'parser.y', 184) + def _reduce_59(val, _values, result) result = val result end .,., -module_eval(<<'.,.,', 'parser.y', 187) - def _reduce_59(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 192) + def _reduce_60(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 192) + def _reduce_61(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 188) + def _reduce_62(val, _values, result) + @grammar.add_category(id: val[1], tokens: val[3], tag: val[0]) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 193) + def _reduce_63(val, _values, result) rule = Grammar::Parameterized::Rule.new(val[1].s_value, val[3], val[7], tag: val[5]) @grammar.add_parameterized_rule(rule) @@ -1660,8 +1695,8 @@ def _reduce_59(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 193) - def _reduce_60(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 199) + def _reduce_64(val, _values, result) rule = Grammar::Parameterized::Rule.new(val[2].s_value, [], val[4], is_inline: true) @grammar.add_parameterized_rule(rule) @@ -1669,8 +1704,8 @@ def _reduce_60(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 198) - def _reduce_61(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 204) + def _reduce_65(val, _values, result) rule = Grammar::Parameterized::Rule.new(val[2].s_value, val[4], val[7], is_inline: true) @grammar.add_parameterized_rule(rule) @@ -1678,22 +1713,22 @@ def _reduce_61(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 202) - def _reduce_62(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 208) + def _reduce_66(val, _values, result) result = [val[0]] result end .,., -module_eval(<<'.,.,', 'parser.y', 203) - def _reduce_63(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 209) + def _reduce_67(val, _values, result) result = val[0].append(val[2]) result end .,., -module_eval(<<'.,.,', 'parser.y', 207) - def _reduce_64(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 213) + def _reduce_68(val, _values, result) builder = val[0] result = [builder] @@ -1701,8 +1736,8 @@ def _reduce_64(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 212) - def _reduce_65(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 218) + def _reduce_69(val, _values, result) builder = val[2] result = val[0].append(builder) @@ -1710,16 +1745,16 @@ def _reduce_65(val, _values, result) end .,., -# reduce 66 omitted +# reduce 70 omitted -# reduce 67 omitted +# reduce 71 omitted -# reduce 68 omitted +# reduce 72 omitted -# reduce 69 omitted +# reduce 73 omitted -module_eval(<<'.,.,', 'parser.y', 218) - def _reduce_70(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 224) + def _reduce_74(val, _values, result) reset_precs result = Grammar::Parameterized::Rhs.new @@ -1727,8 +1762,8 @@ def _reduce_70(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 223) - def _reduce_71(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 229) + def _reduce_75(val, _values, result) token = val[1] token.alias_name = val[2] builder = val[0] @@ -1739,8 +1774,8 @@ def _reduce_71(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 231) - def _reduce_72(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 237) + def _reduce_76(val, _values, result) builder = val[0] builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[2], location: @lexer.location, args: [val[1]]) result = builder @@ -1749,8 +1784,8 @@ def _reduce_72(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 237) - def _reduce_73(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 243) + def _reduce_77(val, _values, result) builder = val[0] builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[3], lhs_tag: val[5]) result = builder @@ -1759,8 +1794,8 @@ def _reduce_73(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 243) - def _reduce_74(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 249) + def _reduce_78(val, _values, result) user_code = val[1] user_code.alias_name = val[2] builder = val[0] @@ -1771,8 +1806,8 @@ def _reduce_74(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 251) - def _reduce_75(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 257) + def _reduce_79(val, _values, result) sym = @grammar.find_symbol_by_id!(val[2]) @prec_seen = true builder = val[0] @@ -1783,33 +1818,33 @@ def _reduce_75(val, _values, result) end .,., -# reduce 76 omitted +# reduce 80 omitted -# reduce 77 omitted +# reduce 81 omitted -module_eval(<<'.,.,', 'parser.y', 258) - def _reduce_78(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 264) + def _reduce_82(val, _values, result) result = val[0].s_value if val[0] result end .,., -module_eval(<<'.,.,', 'parser.y', 271) - def _reduce_79(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 277) + def _reduce_83(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 271) - def _reduce_80(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 277) + def _reduce_84(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 262) - def _reduce_81(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 268) + def _reduce_85(val, _values, result) result = if val[0] [{tag: val[0], tokens: val[1]}] else @@ -1820,100 +1855,86 @@ def _reduce_81(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 268) - def _reduce_82(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 274) + def _reduce_86(val, _values, result) result = val[0].append({tag: val[1], tokens: val[2]}) result end .,., -# reduce 83 omitted +# reduce 87 omitted -# reduce 84 omitted +# reduce 88 omitted -module_eval(<<'.,.,', 'parser.y', 274) - def _reduce_85(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 280) + def _reduce_89(val, _values, result) begin_c_declaration("}") result end .,., -module_eval(<<'.,.,', 'parser.y', 278) - def _reduce_86(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 284) + def _reduce_90(val, _values, result) end_c_declaration result end .,., -module_eval(<<'.,.,', 'parser.y', 282) - def _reduce_87(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 288) + def _reduce_91(val, _values, result) result = val[2] result end .,., -module_eval(<<'.,.,', 'parser.y', 290) - def _reduce_88(val, _values, result) - result = val[1] ? val[1].unshift(val[0]) : val - result - end -.,., - -module_eval(<<'.,.,', 'parser.y', 290) - def _reduce_89(val, _values, result) - result = val[1] ? val[1].unshift(val[0]) : val - result - end -.,., - -module_eval(<<'.,.,', 'parser.y', 285) - def _reduce_90(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 291) + def _reduce_92(val, _values, result) result = [{tag: nil, tokens: val[0]}] result end .,., -module_eval(<<'.,.,', 'parser.y', 286) - def _reduce_91(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 292) + def _reduce_93(val, _values, result) result = [{tag: val[0], tokens: val[1]}] result end .,., -module_eval(<<'.,.,', 'parser.y', 287) - def _reduce_92(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 293) + def _reduce_94(val, _values, result) result = val[0].append({tag: val[1], tokens: val[2]}) result end .,., -module_eval(<<'.,.,', 'parser.y', 289) - def _reduce_93(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 295) + def _reduce_95(val, _values, result) on_action_error("ident after %prec", val[0]) if @prec_seen result end .,., -module_eval(<<'.,.,', 'parser.y', 290) - def _reduce_94(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 296) + def _reduce_96(val, _values, result) on_action_error("char after %prec", val[0]) if @prec_seen result end .,., -# reduce 95 omitted - -# reduce 96 omitted - # reduce 97 omitted # reduce 98 omitted -module_eval(<<'.,.,', 'parser.y', 298) - def _reduce_99(val, _values, result) +# reduce 99 omitted + +# reduce 100 omitted + +module_eval(<<'.,.,', 'parser.y', 304) + def _reduce_101(val, _values, result) lhs = val[0] lhs.alias_name = val[1] val[3].each do |builder| @@ -1926,8 +1947,8 @@ def _reduce_99(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 309) - def _reduce_100(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 315) + def _reduce_102(val, _values, result) builder = val[0] if !builder.line builder.line = @lexer.line - 1 @@ -1938,8 +1959,8 @@ def _reduce_100(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 317) - def _reduce_101(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 323) + def _reduce_103(val, _values, result) builder = val[2] if !builder.line builder.line = @lexer.line - 1 @@ -1950,8 +1971,8 @@ def _reduce_101(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 326) - def _reduce_102(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 332) + def _reduce_104(val, _values, result) reset_precs result = @grammar.create_rule_builder(@rule_counter, @midrule_action_counter) @@ -1959,8 +1980,8 @@ def _reduce_102(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 331) - def _reduce_103(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 337) + def _reduce_105(val, _values, result) token = val[1] token.alias_name = val[2] builder = val[0] @@ -1971,8 +1992,8 @@ def _reduce_103(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 339) - def _reduce_104(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 345) + def _reduce_106(val, _values, result) token = Lrama::Lexer::Token::InstantiateRule.new(s_value: val[2], alias_name: val[3], location: @lexer.location, args: [val[1]], lhs_tag: val[4]) builder = val[0] builder.add_rhs(token) @@ -1983,8 +2004,8 @@ def _reduce_104(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 347) - def _reduce_105(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 353) + def _reduce_107(val, _values, result) token = Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, alias_name: val[5], location: @lexer.location, args: val[3], lhs_tag: val[6]) builder = val[0] builder.add_rhs(token) @@ -1995,8 +2016,8 @@ def _reduce_105(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 355) - def _reduce_106(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 361) + def _reduce_108(val, _values, result) user_code = val[1] user_code.alias_name = val[2] user_code.tag = val[3] @@ -2008,8 +2029,8 @@ def _reduce_106(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 364) - def _reduce_107(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 370) + def _reduce_109(val, _values, result) sym = @grammar.find_symbol_by_id!(val[2]) @prec_seen = true builder = val[0] @@ -2020,33 +2041,33 @@ def _reduce_107(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 371) - def _reduce_108(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 377) + def _reduce_110(val, _values, result) result = "option" result end .,., -module_eval(<<'.,.,', 'parser.y', 372) - def _reduce_109(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 378) + def _reduce_111(val, _values, result) result = "nonempty_list" result end .,., -module_eval(<<'.,.,', 'parser.y', 373) - def _reduce_110(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 379) + def _reduce_112(val, _values, result) result = "list" result end .,., -# reduce 111 omitted +# reduce 113 omitted -# reduce 112 omitted +# reduce 114 omitted -module_eval(<<'.,.,', 'parser.y', 377) - def _reduce_113(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 383) + def _reduce_115(val, _values, result) result = if val[1] [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[0])] else @@ -2057,22 +2078,22 @@ def _reduce_113(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 383) - def _reduce_114(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 389) + def _reduce_116(val, _values, result) result = val[0].append(val[2]) result end .,., -module_eval(<<'.,.,', 'parser.y', 384) - def _reduce_115(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 390) + def _reduce_117(val, _values, result) result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[0].s_value, location: @lexer.location, args: val[2])] result end .,., -module_eval(<<'.,.,', 'parser.y', 388) - def _reduce_116(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 394) + def _reduce_118(val, _values, result) if @prec_seen on_action_error("multiple User_code after %prec", val[0]) if @code_after_prec @code_after_prec = true @@ -2083,31 +2104,31 @@ def _reduce_116(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 396) - def _reduce_117(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 402) + def _reduce_119(val, _values, result) end_c_declaration result end .,., -module_eval(<<'.,.,', 'parser.y', 400) - def _reduce_118(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 406) + def _reduce_120(val, _values, result) result = val[2] result end .,., -module_eval(<<'.,.,', 'parser.y', 403) - def _reduce_119(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 409) + def _reduce_121(val, _values, result) result = val[1].s_value result end .,., -module_eval(<<'.,.,', 'parser.y', 407) - def _reduce_120(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 413) + def _reduce_122(val, _values, result) begin_c_declaration('\Z') @grammar.epilogue_first_lineno = @lexer.line + 1 @@ -2115,8 +2136,8 @@ def _reduce_120(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 412) - def _reduce_121(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 418) + def _reduce_123(val, _values, result) end_c_declaration @grammar.epilogue = val[2].s_value @@ -2124,18 +2145,18 @@ def _reduce_121(val, _values, result) end .,., -# reduce 122 omitted - -# reduce 123 omitted - # reduce 124 omitted # reduce 125 omitted # reduce 126 omitted -module_eval(<<'.,.,', 'parser.y', 423) - def _reduce_127(val, _values, result) +# reduce 127 omitted + +# reduce 128 omitted + +module_eval(<<'.,.,', 'parser.y', 429) + def _reduce_129(val, _values, result) result = Lrama::Lexer::Token::Ident.new(s_value: val[0]) result end diff --git a/parser.y b/parser.y index 1109dbe1..99d0ac72 100644 --- a/parser.y +++ b/parser.y @@ -167,6 +167,7 @@ rule } @precedence_number += 1 } + | "%token-categories" token_categories_declarations token_declarations: TAG? token_declaration+ { @@ -183,6 +184,11 @@ rule token_declaration: id INTEGER? alias { result = val } + token_categories_declarations: TAG? IDENT_COLON ":" id+ + { + @grammar.add_category(id: val[1], tokens: val[3], tag: val[0]) + } + rule_declaration: "%rule" IDENTIFIER "(" rule_args ")" TAG? ":" rule_rhs_list { rule = Grammar::Parameterized::Rule.new(val[1].s_value, val[3], val[7], tag: val[5]) diff --git a/sig/generated/lrama/category.rbs b/sig/generated/lrama/category.rbs new file mode 100644 index 00000000..d037defa --- /dev/null +++ b/sig/generated/lrama/category.rbs @@ -0,0 +1,19 @@ +# Generated from lib/lrama/category.rb with RBS::Inline + +module Lrama + class Category + attr_reader id: Lrama::Lexer::Token::Ident + + attr_reader name: String + + attr_reader tokens: Array[Lrama::Lexer::Token::Ident] + + attr_reader tag: Lexer::Token::Tag + + # @rbs (id: Lrama::Lexer::Token::Ident) -> void + def initialize: (id: Lrama::Lexer::Token::Ident) -> void + + # @rbs (Array[Lrama::Lexer::Token::Ident] tokens, Lexer::Token::Tag tag) -> void + def add_tokens: (Array[Lrama::Lexer::Token::Ident] tokens, Lexer::Token::Tag tag) -> void + end +end diff --git a/sig/lrama/grammar.rbs b/sig/lrama/grammar.rbs index 7374db87..47879b14 100644 --- a/sig/lrama/grammar.rbs +++ b/sig/lrama/grammar.rbs @@ -21,6 +21,9 @@ module Lrama @aux: Auxiliary @no_stdlib: bool @locations: bool + @define: Hash[String, String] + @required: bool + @categories: Hash[String, Lrama::Category] @union: Union extend Forwardable @@ -63,6 +66,7 @@ module Lrama def add_precedence: (Grammar::Symbol sym, Integer precedence) -> Precedence def set_precedence: (Grammar::Symbol sym, Precedence precedence) -> (Precedence | bot) def set_union: (Grammar::Code::NoReferenceCode code, Integer lineno) -> Union + def add_category: (id: Lrama::Lexer::Token::Ident, tokens: Array[Lrama::Lexer::Token::Ident], tag: Lexer::Token::Tag) -> void def add_rule_builder: (RuleBuilder builder) -> Array[RuleBuilder] def add_parameterized_rule: (Parameterized::Rule rule) -> Array[Parameterized::Rule] def parameterized_rules: () -> Array[Parameterized::Rule] @@ -75,6 +79,7 @@ module Lrama def validate!: () -> void def find_rules_by_symbol!: (Grammar::Symbol sym) -> Array[Rule] def find_rules_by_symbol: (Grammar::Symbol sym) -> Array[Rule]? + def find_category: (String) -> Lrama::Category private @@ -84,6 +89,7 @@ module Lrama def setup_rules: () -> Array[RuleBuilder] def append_special_symbols: () -> Grammar::Symbol def normalize_rules: () -> Array[Rule] + def expand_categories: () -> void def collect_symbols: () -> Array[Lexer::Token] def set_lhs_and_rhs: () -> void def fill_default_precedence: () -> void diff --git a/spec/fixtures/categories/basic.y b/spec/fixtures/categories/basic.y new file mode 100644 index 00000000..5181d2e0 --- /dev/null +++ b/spec/fixtures/categories/basic.y @@ -0,0 +1,41 @@ +/* + * This is comment for this file. + */ + +%{ +// Prologue +static int yylex(YYSTYPE *val, YYLTYPE *loc); +static int yyerror(YYLTYPE *loc, const char *str); +%} + +%union { + int i; + char* s; +} + +%token integer number count +%token string identifier +%token-categories num: integer number count +%token-categories str: string identifier + +%% + +program : num + | str + ; + +%% + +static int yylex(YYSTYPE *yylval, YYLTYPE *loc) +{ + return 0; +} + +static int yyerror(YYLTYPE *loc, const char *str) +{ + return 0; +} + +int main(int argc, char *argv[]) +{ +} diff --git a/spec/lrama/parser_spec.rb b/spec/lrama/parser_spec.rb index a328e017..57998872 100644 --- a/spec/lrama/parser_spec.rb +++ b/spec/lrama/parser_spec.rb @@ -2869,6 +2869,87 @@ end end + context "when categories" do + let(:grammar) do + grammar = Lrama::Parser.new(y, path).parse + grammar.prepare + grammar.validate! + grammar + end + let(:y) { File.read(fixture_path(path)) } + let(:path) { "categories/basic.y" } + + it "expands categories" do + expect(grammar.nterms.sort_by(&:number)).to match_symbols([ + Sym.new(id: T::Ident.new(s_value: "$accept"), alias_name: nil, number: 8, tag: nil, term: false, token_id: 0, nullable: false), + Sym.new(id: T::Ident.new(s_value: "program"), alias_name: nil, number: 9, tag: nil, term: false, token_id: 1, nullable: false), + Sym.new(id: T::Ident.new(s_value: "num"), alias_name: nil, number: 10, tag: nil, term: false, token_id: 2, nullable: false), + Sym.new(id: T::Ident.new(s_value: "str"), alias_name: nil, number: 11, tag: nil, term: false, token_id: 3, nullable: false), + ]) + + expect(grammar.rules).to eq([ + Rule.new( + id: 0, + lhs: grammar.find_symbol_by_s_value!("$accept"), + rhs: [ + grammar.find_symbol_by_s_value!("program"), + grammar.find_symbol_by_s_value!("YYEOF"), + ], + token_code: nil, + nullable: false, + precedence_sym: grammar.find_symbol_by_s_value!("YYEOF"), + lineno: 23, + ), + Rule.new( + id: 1, + lhs: grammar.find_symbol_by_s_value!("program"), + rhs: [ + grammar.find_symbol_by_s_value!("num"), + ], + token_code: nil, + nullable: false, + precedence_sym: nil, + lineno: 23, + ), + Rule.new( + id: 2, + lhs: grammar.find_symbol_by_s_value!("program"), + rhs: [ + grammar.find_symbol_by_s_value!("str"), + ], + token_code: nil, + nullable: false, + precedence_sym: nil, + lineno: 24, + ), + Rule.new( + id: 3, + lhs: grammar.find_symbol_by_s_value!("num"), + rhs: [ + grammar.find_symbol_by_s_value!("integer"), + grammar.find_symbol_by_s_value!("number"), + grammar.find_symbol_by_s_value!("count"), + ], + token_code: nil, + nullable: false, + precedence_sym: grammar.find_symbol_by_s_value!("count"), + lineno: 18, + ), + Rule.new( + id: 4, + lhs: grammar.find_symbol_by_s_value!("str"), + rhs: [ + grammar.find_symbol_by_s_value!("string"), + grammar.find_symbol_by_s_value!("identifier"), + ], + nullable: false, + precedence_sym: grammar.find_symbol_by_s_value!("identifier"), + lineno: 19, + ) + ]) + end + end + it "; for rules is optional" do y = header + <<~INPUT %%