diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb index a6d76fdc..ba8ca2e7 100644 --- a/lib/rexml/parsers/xpathparser.rb +++ b/lib/rexml/parsers/xpathparser.rb @@ -19,9 +19,7 @@ def namespaces=( namespaces ) end def parse path - path = path.dup - path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces - path.gsub!( /\s+([\]\)])/, '\1') + path = strip_ignorable_spaces(path) parsed = [] rest = OrExpr(path, parsed) if rest @@ -33,6 +31,20 @@ def parse path parsed end + def strip_ignorable_spaces(path) + # Safely do `path.gsub(/([\(\[])\s+/, '\1').gsub( /\s+([\]\)])/, '\1')` + # without modifying spaces inside string literals. + quote = nil + path.gsub(/([\(\[])\s+|\s+([\]\)])|(['"])/) do + if quote + quote = nil if $3 == quote + $& + else + $1 || $2 || (quote = $3) + end + end + end + def predicate path parsed = [] Predicate( "[#{path}]", parsed ) @@ -678,12 +690,20 @@ def get_group string depth = 0 st = string[0,1] en = (st == "(" ? ")" : "]") + quote = nil begin - case string[ind,1] - when st - depth += 1 - when en - depth -= 1 + if quote + # ignore () [] inside quotes + quote = nil if string[ind] == quote + else + case string[ind] + when st + depth += 1 + when en + depth -= 1 + when '"', "'" + quote = string[ind] + end end ind += 1 end while depth > 0 and ind < string.length diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb index 1c6eb624..0013e07e 100644 --- a/test/xpath/test_base.rb +++ b/test/xpath/test_base.rb @@ -803,6 +803,36 @@ def test_spaces match.call('/ a / child:: c [( @id )] /')) end + def test_space_inside_xpath + parser = Parsers::XPathParser.new + assert_equal( + parser.parse('/a/b[string-length("1")<(2+3)]/c'), + parser.parse(' / a / b [ string-length( "1" ) < ( 2 + 3 ) ] / c '), + ) + assert_equal( + parser.parse('//processing-instruction("a")'), + parser.parse('//processing-instruction( "a" )'), + ) + end + + def test_space_paren_brace_inside_xpath_string + doc = Document.new(<<~XML) + + + + + XML + + assert_equal( + [" [ ' 1 ) "], + REXML::XPath.match(doc, "/a/b[@id=\" [ ' 1 ) \"]").map { |e| e.attributes['id'] } + ) + assert_equal( + [' ( " 2 ] '], + REXML::XPath.match(doc, "/a/b[@id=' ( \" 2 ] ']").map { |e| e.attributes['id'] } + ) + end + def test_text_nodes # source = " #