diff --git a/Gemfile.lock b/Gemfile.lock index f193507..019479a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -28,79 +28,83 @@ GEM securerandom (>= 0.3) tzinfo (~> 2.0, >= 2.0.5) uri (>= 0.13.1) - addressable (2.8.7) - public_suffix (>= 2.0.2, < 7.0) + addressable (2.9.0) + public_suffix (>= 2.0.2, < 8.0) ast (2.4.3) base64 (0.3.0) benchmark (0.5.0) - bigdecimal (3.3.1) + bigdecimal (4.1.2) coderay (1.1.3) - concurrent-ruby (1.3.5) - connection_pool (2.5.4) + concurrent-ruby (1.3.6) + connection_pool (3.0.2) docile (1.4.1) drb (2.2.3) - excon (1.3.1) + excon (1.4.2) logger - faraday (2.14.0) + faraday (2.14.1) faraday-net_http (>= 2.0, < 3.5) json logger faraday-excon (2.4.0) excon (>= 1.0.0) faraday (>= 2.11.0, < 3) - faraday-follow_redirects (0.4.0) + faraday-follow_redirects (0.5.0) faraday (>= 1, < 3) - faraday-multipart (1.1.1) + faraday-multipart (1.2.0) multipart-post (~> 2.0) faraday-net_http (3.4.2) net-http (~> 0.5) - i18n (1.14.7) + i18n (1.14.8) concurrent-ruby (~> 1.0) - json (2.16.0) + io-console (0.8.2) + json (2.19.4) language_server-protocol (3.17.0.5) lint_roller (1.1.0) logger (1.7.0) lz4-ruby (0.3.3) method_source (1.1.0) - minitest (5.26.2) - minitest-hooks (1.5.2) + minitest (5.27.0) + minitest-hooks (1.5.3) minitest (> 5.3) - multi_json (1.17.0) + multi_json (1.20.1) multipart-post (2.4.1) - net-http (0.8.0) + net-http (0.9.1) uri (>= 0.11.1) - oj (3.16.12) + oj (3.17.0) bigdecimal (>= 3.0) ostruct (>= 0.2) ostruct (0.6.3) - parallel (1.27.0) - parser (3.3.10.0) + parallel (1.28.0) + parser (3.3.11.1) ast (~> 2.4.1) racc - prism (1.6.0) - pry (0.15.2) + prism (1.9.0) + pry (0.16.0) coderay (~> 1.1) method_source (~> 1.0) - public_suffix (6.0.2) + reline (>= 0.6.0) + public_suffix (7.0.5) racc (1.8.1) rainbow (3.1.1) - rake (13.3.1) - regexp_parser (2.11.3) + rake (13.4.2) + regexp_parser (2.12.0) + reline (0.6.3) + io-console (~> 0.5) rexml (3.4.4) - rubocop (1.81.7) + rubocop (1.86.1) json (~> 2.3) language_server-protocol (~> 3.17.0.2) lint_roller (~> 1.1.0) - parallel (~> 1.10) + parallel (>= 1.10) parser (>= 3.3.0.2) rainbow (>= 2.2.2, < 4.0) regexp_parser (>= 2.9.3, < 3.0) - rubocop-ast (>= 1.47.1, < 2.0) + rubocop-ast (>= 1.49.0, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 4.0) - rubocop-ast (1.48.0) + rubocop-ast (1.49.1) parser (>= 3.3.7.2) - prism (~> 1.4) + prism (~> 1.7) ruby-progressbar (1.13.0) securerandom (0.4.1) simplecov (0.22.0) @@ -116,11 +120,11 @@ GEM concurrent-ruby (~> 1.0) unicode-display_width (3.2.0) unicode-emoji (~> 4.1) - unicode-emoji (4.1.0) + unicode-emoji (4.2.0) uri (1.1.1) PLATFORMS - arm64-darwin-24 + arm64-darwin-23 ruby DEPENDENCIES @@ -134,5 +138,63 @@ DEPENDENCIES simplecov simplecov-cobertura +CHECKSUMS + activesupport (8.0.3) sha256=a711ce5e30660b23232f26a38699469f8d859d47aa1f722e183fda6d7cc17823 + addressable (2.9.0) sha256=7fdf6ac3660f7f4e867a0838be3f6cf722ace541dd97767fa42bc6cfa980c7af + ast (2.4.3) sha256=954615157c1d6a382bc27d690d973195e79db7f55e9765ac7c481c60bdb4d383 + base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b + benchmark (0.5.0) sha256=465df122341aedcb81a2a24b4d3bd19b6c67c1530713fd533f3ff034e419236c + bigdecimal (4.1.2) sha256=53d217666027eab4280346fba98e7d5b66baaae1b9c3c1c0ffe89d48188a3fbd + coderay (1.1.3) sha256=dc530018a4684512f8f38143cd2a096c9f02a1fc2459edcfe534787a7fc77d4b + concurrent-ruby (1.3.6) sha256=6b56837e1e7e5292f9864f34b69c5a2cbc75c0cf5338f1ce9903d10fa762d5ab + connection_pool (3.0.2) sha256=33fff5ba71a12d2aa26cb72b1db8bba2a1a01823559fb01d29eb74c286e62e0a + docile (1.4.1) sha256=96159be799bfa73cdb721b840e9802126e4e03dfc26863db73647204c727f21e + drb (2.2.3) sha256=0b00d6fdb50995fe4a45dea13663493c841112e4068656854646f418fda13373 + excon (1.4.2) sha256=32d8d8eda619717d9b8043b4675e096fb5c2139b080e2ad3b267f88c545aaa35 + faraday (2.14.1) sha256=a43cceedc1e39d188f4d2cdd360a8aaa6a11da0c407052e426ba8d3fb42ef61c + faraday-excon (2.4.0) sha256=2a07ff3583468468eb62325c6263d0b2dd39282a8de7dc35908d782de531c8f6 + faraday-follow_redirects (0.5.0) sha256=5cde93c894b30943a5d2b93c2fe9284216a6b756f7af406a1e55f211d97d10ad + faraday-multipart (1.2.0) sha256=7d89a949693714176f612323ca13746a2ded204031a6ba528adee788694ef757 + faraday-net_http (3.4.2) sha256=f147758260d3526939bf57ecf911682f94926a3666502e24c69992765875906c + i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5 + io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc + json (2.19.4) sha256=670a7d333fb3b18ca5b29cb255eb7bef099e40d88c02c80bd42a3f30fe5239ac + language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc + lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87 + logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203 + lz4-ruby (0.3.3) sha256=011be5ee230cfddc8308d4e2e0b05300c7bc755a887de799377ca6c5b6aede89 + method_source (1.1.0) sha256=181301c9c45b731b4769bc81e8860e72f9161ad7d66dd99103c9ab84f560f5c5 + minitest (5.27.0) sha256=2d3b17f8a36fe7801c1adcffdbc38233b938eb0b4966e97a6739055a45fa77d5 + minitest-hooks (1.5.3) sha256=ef50dd3bf47e6d1646befc358c640c71ca41f5650f0036b4c69929a44d6f32c4 + multi_json (1.20.1) sha256=2f3934e805cc45ef91b551a1f89d0e9191abd06a5e04a2ef09a6a036c452ca6d + multipart-post (2.4.1) sha256=9872d03a8e552020ca096adadbf5e3cb1cd1cdd6acd3c161136b8a5737cdb4a8 + net-http (0.9.1) sha256=25ba0b67c63e89df626ed8fac771d0ad24ad151a858af2cc8e6a716ca4336996 + oj (3.17.0) sha256=5684b2127fb70e650fae90df521b91336ff8e55e2e1011ed80eb0283beac5360 + ontologies_api_client (2.8.1) + ostruct (0.6.3) sha256=95a2ed4a4bd1d190784e666b47b2d3f078e4a9efda2fccf18f84ddc6538ed912 + parallel (1.28.0) sha256=33e6de1484baf2524792d178b0913fc8eb94c628d6cfe45599ad4458c638c970 + parser (3.3.11.1) sha256=d17ace7aabe3e72c3cc94043714be27cc6f852f104d81aa284c2281aecc65d54 + prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85 + pry (0.16.0) sha256=d76c69065698ed1f85e717bd33d7942c38a50868f6b0673c636192b3d1b6054e + public_suffix (7.0.5) sha256=1a8bb08f1bbea19228d3bed6e5ed908d1cb4f7c2726d18bd9cadf60bc676f623 + racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f + rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a + rake (13.4.2) sha256=cb825b2bd5f1f8e91ca37bddb4b9aaf345551b4731da62949be002fa89283701 + regexp_parser (2.12.0) sha256=35a916a1d63190ab5c9009457136ae5f3c0c7512d60291d0d1378ba18ce08ebb + reline (0.6.3) sha256=1198b04973565b36ec0f11542ab3f5cfeeec34823f4e54cebde90968092b1835 + rexml (3.4.4) sha256=19e0a2c3425dfbf2d4fc1189747bdb2f849b6c5e74180401b15734bc97b5d142 + rubocop (1.86.1) sha256=44415f3f01d01a21e01132248d2fd0867572475b566ca188a0a42133a08d4531 + rubocop-ast (1.49.1) sha256=4412f3ee70f6fe4546cc489548e0f6fcf76cafcfa80fa03af67098ffed755035 + ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33 + securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1 + simplecov (0.22.0) sha256=fe2622c7834ff23b98066bb0a854284b2729a569ac659f82621fc22ef36213a5 + simplecov-cobertura (3.1.0) sha256=6d7f38aa32c965ca2174b2e5bd88cb17138eaf629518854976ac50e628925dc5 + simplecov-html (0.13.2) sha256=bd0b8e54e7c2d7685927e8d6286466359b6f16b18cb0df47b508e8d73c777246 + simplecov_json_formatter (0.1.4) sha256=529418fbe8de1713ac2b2d612aa3daa56d316975d307244399fa4838c601b428 + tzinfo (2.0.6) sha256=8daf828cc77bcf7d63b0e3bdb6caa47e2272dcfaf4fbfe46f8c3a9df087a829b + unicode-display_width (3.2.0) sha256=0cdd96b5681a5949cdbc2c55e7b420facae74c4aaf9a9815eee1087cb1853c42 + unicode-emoji (4.2.0) sha256=519e69150f75652e40bf736106cfbc8f0f73aa3fb6a65afe62fefa7f80b0f80f + uri (1.1.1) sha256=379fa58d27ffb1387eaada68c749d1426738bd0f654d812fcc07e7568f5c57c6 + BUNDLED WITH - 2.7.2 + 4.0.7 diff --git a/lib/ontologies_api_client/collection.rb b/lib/ontologies_api_client/collection.rb index add8ce4..dc29cf8 100644 --- a/lib/ontologies_api_client/collection.rb +++ b/lib/ontologies_api_client/collection.rb @@ -10,6 +10,8 @@ def self.included(base) end module ClassMethods + PAGED_COLLECTION_SIZE = 5_000 + ## # Allows for arbitrary find_by methods. For example: # Ontology.find_by_acronym("BRO") @@ -54,7 +56,12 @@ def collection_path # Get all resources from the base collection for a resource def all(*args) params = args.shift || {} - entry_point(@media_type, params) + request_params = collection_request_params(params) + response = entry_point(@media_type, request_params) + + return response if page_requested?(params) || !paged_collection?(response) + + all_pages(response, request_params) end ## @@ -114,6 +121,49 @@ def find_by(attrs, *args) bools.all? end end + + private + + def all_pages(first_page, params) + collection = Array(first_page.collection) + next_page = first_page.nextPage + + while next_page + page = entry_point(@media_type, next_page_params(params, next_page)) + collection.concat(Array(page.collection)) + next_page = page.nextPage + end + + collection + end + + def collection_request_params(params) + return params if page_requested?(params) || page_size_requested?(params) + + params.merge(pagesize: PAGED_COLLECTION_SIZE) + end + + def next_page_params(params, page) + page_params = params.dup + page_params[page_params.key?("page") ? "page" : :page] = page + page_params[:pagesize] = PAGED_COLLECTION_SIZE unless page_size_requested?(page_params) + page_params + end + + def page_requested?(params) + params.key?(:page) || params.key?("page") + end + + def page_size_requested?(params) + params.key?(:pagesize) || params.key?("pagesize") + end + + def paged_collection?(response) + response.respond_to?(:collection) && + response.respond_to?(:pageCount) && + response.respond_to?(:nextPage) && + response.collection.is_a?(Array) + end end end end diff --git a/lib/ontologies_api_client/models/user.rb b/lib/ontologies_api_client/models/user.rb index e0d9fbf..0742db7 100644 --- a/lib/ontologies_api_client/models/user.rb +++ b/lib/ontologies_api_client/models/user.rb @@ -9,7 +9,12 @@ class User < LinkedData::Client::Base include LinkedData::Client::ReadWrite @media_type = "http://data.bioontology.org/metadata/User" - @include_attrs = "all" + @include_attrs = "username,email,role,firstName,lastName,created" + + def self.all(*args) + params = args.shift || {} + super({ display_context: false, display_links: false }.merge(params), *args) + end def self.authenticate(user, password) auth_params = {user: user, password: password, include: "all"} diff --git a/test/benchmark/users.rb b/test/benchmark/users.rb new file mode 100644 index 0000000..283a2dc --- /dev/null +++ b/test/benchmark/users.rb @@ -0,0 +1,129 @@ +# frozen_string_literal: true + +require 'benchmark' +require 'json' +require 'net/http' +require 'optparse' +require 'uri' + +$stdout.sync = true + +ENV['UT_APIKEY'] ||= 'manual-timing' + +require_relative '../../lib/ontologies_api_client' +require_relative '../../config/config' + +options = { pagesize: 5_000 } +OptionParser.new do |opts| + opts.banner = 'Usage: ruby test/benchmark/users.rb [--page=N] [--pagesize=M]' + opts.on('--page=N', Integer, 'Single-page mode: fetch one page (skips all-pages walk)') { |v| options[:page] = v } + opts.on('--pagesize=M', Integer, "Page size (default: #{options[:pagesize]})") { |v| options[:pagesize] = v } +end.parse! + +def users_uri(params = {}) + uri = URI.join(LinkedData::Client.settings.rest_url.chomp('/') + '/', 'users') + uri.query = URI.encode_www_form(params) + uri +end + +def get_json(uri) + request = Net::HTTP::Get.new(uri) + request['Accept'] = 'application/json' + request['Authorization'] = "apikey token=#{LinkedData::Client.settings.apikey}" + request['User-Agent'] = "NCBO API Ruby Client v#{LinkedData::Client::VERSION}" + + Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') do |http| + http.request(request) + end +end + +def summarize_json(body) + parsed = JSON.parse(body) + + if parsed.is_a?(Array) + { shape: 'array', count: parsed.length } + elsif parsed.is_a?(Hash) && parsed['collection'].is_a?(Array) + { + shape: 'page', + count: parsed['collection'].length, + page: parsed['page'], + page_count: parsed['pageCount'], + total_count: parsed['totalCount'], + next_page: parsed['nextPage'] + } + else + { shape: parsed.class.name } + end +rescue JSON::ParserError => e + { shape: 'unparseable', error: e.message } +end + +def time_raw(label, params) + uri = users_uri(params) + response = nil + + elapsed = Benchmark.realtime { response = get_json(uri) } + + summary = summarize_json(response.body) + puts "\n#{label}" + puts " uri: #{uri}" + puts " status: #{response.code}" + puts " elapsed: #{format('%.3f', elapsed)}s" + puts " bytes: #{response.body.bytesize}" + puts " summary: #{summary}" +end + +def time_raw_users_include_all + time_raw('Raw GET /users?include=all (no pagination params)', include: 'all') +end + +def time_raw_users_paged(page:, pagesize:) + time_raw("Raw GET /users (page=#{page}, pagesize=#{pagesize}, include=all)", + page: page, pagesize: pagesize, include: 'all') +end + +def time_user_all + users = nil + + elapsed = Benchmark.realtime do + users = LinkedData::Client::Models::User.all + end + + puts "\nLinkedData::Client::Models::User.all (auto-paginate, walks all pages)" + puts " elapsed: #{format('%.3f', elapsed)}s" + puts " class: #{users.class}" + puts " count: #{users.respond_to?(:length) ? users.length : 'n/a'}" + puts " first: #{users.first.username if users.respond_to?(:first) && users.first.respond_to?(:username)}" +end + +def time_user_single_page(page:, pagesize:) + result = nil + + elapsed = Benchmark.realtime do + result = LinkedData::Client::Models::User.all(page: page, pagesize: pagesize) + end + + puts "\nLinkedData::Client::Models::User.all(page: #{page}, pagesize: #{pagesize})" + puts " elapsed: #{format('%.3f', elapsed)}s" + puts " class: #{result.class}" + if result.respond_to?(:collection) + puts " page: #{result.page} / #{result.pageCount}" + puts " total_count: #{result.totalCount}" + puts " collection.length: #{result.collection.length}" + puts " next_page: #{result.nextPage.inspect}" + first = result.collection.first + puts " first: #{first.username if first.respond_to?(:username)}" + else + puts " (response was not paged) preview: #{result.inspect[0, 200]}" + end +end + +puts "REST URL: #{LinkedData::Client.settings.rest_url}" + +if options[:page] + time_raw_users_paged(page: options[:page], pagesize: options[:pagesize]) + time_user_single_page(page: options[:page], pagesize: options[:pagesize]) +else + time_raw_users_include_all + time_user_all +end diff --git a/test/models/test_collection.rb b/test/models/test_collection.rb index d97cd79..5da3a75 100644 --- a/test/models/test_collection.rb +++ b/test/models/test_collection.rb @@ -17,6 +17,70 @@ def test_all assert onts.length > 350 end + def test_all_flattens_paged_collections + calls = [] + pages = { + 1 => OpenStruct.new(collection: %w[a b], pageCount: 2, nextPage: 2), + 2 => OpenStruct.new(collection: %w[c], pageCount: 2, nextPage: nil) + } + + TestOntology.stub(:entry_point, ->(_media_type, params) { + calls << params + pages.fetch(params[:page] || 1) + }) do + assert_equal %w[a b c], TestOntology.all + end + + assert_equal [{ pagesize: 5_000 }, { pagesize: 5_000, page: 2 }], calls + end + + def test_all_returns_page_when_page_requested + requested_page = OpenStruct.new(collection: %w[a b], pageCount: 2, nextPage: 2) + + TestOntology.stub(:entry_point, ->(_media_type, _params) { requested_page }) do + assert_same requested_page, TestOntology.all(page: 1) + end + end + + # Back-compat invariant: when an endpoint returns a flat Array (i.e. it + # hasn't been paginated yet — e.g. /ontologies, /groups, /categories + # today), Collection#all must pass the response through unchanged. This + # is what keeps non-paginated endpoints working after this gem's + # auto-flatten change. Guards against future refactors of the page-walk + # logic accidentally breaking the non-paged path; complements the + # network-dependent `test_all` which would only catch the regression + # against a live API. + def test_all_passes_through_array_response + array_response = [OpenStruct.new(id: 'a'), OpenStruct.new(id: 'b'), OpenStruct.new(id: 'c')] + calls = [] + + TestOntology.stub(:entry_point, ->(_media_type, params) { + calls << params + array_response + }) do + assert_same array_response, TestOntology.all + end + + # Sanity: pagesize is still injected on the request — the API simply + # ignores it and returns an Array, which we hand back as-is. + assert_equal [{ pagesize: 5_000 }], calls + end + + def test_user_all_uses_lightweight_defaults + calls = [] + + LinkedData::Client::Models::User.stub(:entry_point, ->(_media_type, params) { + calls << params + [] + }) do + assert_equal [], LinkedData::Client::Models::User.all + end + + assert_equal "username,email,role,firstName,lastName,created", LinkedData::Client::Models::User.include_attrs + assert_equal false, calls.first[:display_context] + assert_equal false, calls.first[:display_links] + end + def test_class_for_type media_type = 'http://data.bioontology.org/metadata/Category' type_cls = LinkedData::Client::Base.class_for_type(media_type)