8000 Improve using `//` in XPath performance by naitoh · Pull Request #249 · ruby/rexml · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Improve using // in XPath performance #249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions benchmark/xpath.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
loop_count: 100
contexts:
- gems:
rexml: 3.2.6
require: false
prelude: require 'rexml'
- name: master
prelude: |
$LOAD_PATH.unshift(File.expand_path("lib"))
require 'rexml'
- name: 3.2.6(YJIT)
gems:
rexml: 3.2.6
require: false
prelude: |
require 'rexml'
10000 RubyVM::YJIT.enable
- name: master(YJIT)
prelude: |
$LOAD_PATH.unshift(File.expand_path("lib"))
require 'rexml'
RubyVM::YJIT.enable

prelude: |
require 'rexml/document'

DEPTH = 100
xml = '<a>' * DEPTH + '</a>' * DEPTH
doc = REXML::Document.new(xml)

benchmark:
"REXML::XPath.match(REXML::Document.new(xml), 'a//a')" : REXML::XPath.match(doc, "a//a")
4 changes: 4 additions & 0 deletions lib/rexml/attribute.rb
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,10 @@ def xpath
path += "/@#{self.expanded_name}"
return path
end

def document
@element&.document
end
end
end
#vim:ts=2 sw=2 noexpandtab:
14 changes: 14 additions & 0 deletions lib/rexml/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,20 @@ def document
end

private

attr_accessor :namespaces_cache

# New document level cache is created and available in this block.
# This API is thread unsafe. Users can't change this document in this block.
def enable_cache
@namespaces_cache = {}
begin
yield
ensure
@namespaces_cache = nil
end
end

def build( source )
Parsers::TreeParser.new( source, self ).parse
end
Expand Down
33 changes: 17 additions & 16 deletions lib/rexml/element.rb
Original file line number Diff line number Diff line change
Expand Up @@ -589,10 +589,12 @@ def prefixes
# d.elements['//c'].namespaces # => {"x"=>"1", "y"=>"2", "z"=>"3"}
#
def namespaces
namespaces = {}
namespaces = parent.namespaces if parent
namespaces = namespaces.merge( attributes.namespaces )
return namespaces
namespaces_cache = document&.__send__(:namespaces_cache)
if namespaces_cache
namespaces_cache[self] ||= calculate_namespaces
else
calculate_namespaces
end
end

# :call-seq:
Expand All @@ -619,17 +621,9 @@ def namespace(prefix=nil)
if prefix.nil?
prefix = prefix()
end
if prefix == ''
prefix = "xmlns"
else
prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
end
ns = nil
target = self
while ns.nil? and target
ns = target.attributes[prefix]
target = target.parent
end
prefix = (prefix == '') ? 'xmlns' : prefix.delete_prefix("xmlns:")
ns = namespaces[prefix]

ns = '' if ns.nil? and prefix == 'xmlns'
return ns
end
Expand Down Expand Up @@ -1516,8 +1510,15 @@ def write(output=$stdout, indent=-1, transitive=false, ie_hack=false)
formatter.write( self, output )
end


private
def calculate_namespaces
if parent
parent.namespaces.merge(attributes.namespaces)
else
attributes.namespaces
end
end

def __to_xpath_helper node
rv = node.expanded_name.clone
if node.parent
Expand Down
27 changes: 12 additions & 15 deletions lib/rexml/xpath_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,15 @@ def variables=( vars={} )

def parse path, node
path_stack = @parser.parse( path )
match( path_stack, node )
if node.is_a?(Array)
Kernel.warn("REXML::XPath.each, REXML::XPath.first, REXML::XPath.match dropped support for nodeset...", uplevel: 1)
return [] if node.empty?
node = node.first
end

node.document.__send__(:enable_cache) do
match( path_stack, node )
end
end

def get_first path, node
Expand Down Expand Up @@ -137,11 +145,6 @@ def first( path_stack, node )


def match(path_stack, node)
if node.is_a?(Array)
Kernel.warn("REXML::XPath.each, REXML::XPath.first, REXML::XPath.match dropped support for nodeset...", uplevel: 1)
return [] if node.empty?
node = node.first
end
nodeset = [XPathNode.new(node, position: 1)]
result = expr(path_stack, nodeset)
case result
Expand Down Expand Up @@ -494,24 +497,18 @@ def node_test(path_stack, nodesets, any_type: :element)
if strict?
raw_node.name == name and raw_node.namespace == ""
else
# FIXME: This DOUBLES the time XPath searches take
ns = get_namespace(raw_node, prefix)
raw_node.name == name and raw_node.namespace == ns
raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix)
end
else
# FIXME: This DOUBLES the time XPath searches take
ns = get_namespace(raw_node, prefix)
raw_node.name == name and raw_node.namespace == ns
raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix)
end
when :attribute
if prefix.nil?
raw_node.name == name
elsif prefix.empty?
raw_node.name == name and raw_node.namespace == ""
else
# FIXME: This DOUBLES the time XPath searches take
ns = get_namespace(raw_node.element, prefix)
raw_node.name == name and raw_node.namespace == ns
raw_node.name == name and raw_node.namespace == get_namespace(raw_node.element, prefix)
end
else
false
Expand Down
23 changes: 17 additions & 6 deletions test/test_core.rb
Original file line number Diff line number Diff line change
Expand Up @@ -653,18 +653,23 @@ def test_namespace
assert_equal "<sean:blah>Some text</sean:blah>", out
end


def test_add_namespace
e = Element.new 'a'
assert_equal("", e.namespace)
assert_nil(e.namespace('foo'))
e.add_namespace 'someuri'
e.add_namespace 'foo', 'otheruri'
e.add_namespace 'xmlns:bar', 'thirduri'
assert_equal 'someuri', e.attributes['xmlns']
assert_equal 'otheruri', e.attributes['xmlns:foo']
assert_equal 'thirduri', e.attributes['xmlns:bar']
assert_equal("someuri", e.namespace)
assert_equal("otheruri", e.namespace('foo'))
assert_equal("otheruri", e.namespace('xmlns:foo'))
assert_equal("thirduri", e.namespace('bar'))
assert_equal("thirduri", e.namespace('xmlns:bar'))
assert_equal('someuri', e.attributes['xmlns'])
assert_equal('otheruri', e.attributes['xmlns:foo'])
assert_equal('thirduri', e.attributes['xmlns:bar'])
end


def test_big_documentation
d = File.open(fixture_path("documentation.xml")) {|f| Document.new f }
assert_equal "Sean Russell", d.elements["documentation/head/author"].text.tr("\n\t", " ").squeeze(" ")
Expand Down Expand Up @@ -764,9 +769,15 @@ def test_attributes_each

def test_delete_namespace
doc = Document.new "<a xmlns='1' xmlns:x='2'/>"
assert_equal("1", doc.root.namespace)
assert_equal("2", doc.root.namespace('x'))
assert_equal("2", doc.root.namespace('xmlns:x'))
doc.root.delete_namespace
doc.root.delete_namespace 'x'
assert_equal "<a/>", doc.to_s
assert_equal("<a/>", doc.to_s)
assert_equal("", doc.root.namespace)
assert_nil(doc.root.namespace('x'))
assert_nil(doc.root.namespace('xmlns:x'))
end

def test_each_element_with_attribute
Expand Down
10 changes: 10 additions & 0 deletions test/xpath/test_base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1193,6 +1193,16 @@ def test_namespaces_0
assert_equal( 1, XPath.match( d, "//x:*" ).size )
end

def test_namespaces_cache
doc = Document.new("<a xmlns='1'><b/></a>")
assert_equal("<b/>", XPath.first(doc, "//b[namespace-uri()='1']").to_s)
assert_nil(XPath.first(doc, "//b[namespace-uri()='']"))

doc.root.delete_namespace
assert_nil(XPath.first(doc, "//b[namespace-uri()='1']"))
assert_equal("<b/>", XPath.first(doc, "//b[namespace-uri()='']").to_s)
end

def test_ticket_71
doc = Document.new(%Q{<root xmlns:ns1="xyz" xmlns:ns2="123"><element ns1:attrname="foo" ns2:attrname="bar"/></root>})
el = doc.root.elements[1]
Expand Down
Loading
0