From b6caffc7e301950eb6762630c5f5972ef4a81e91 Mon Sep 17 00:00:00 2001 From: mdorf Date: Thu, 18 Jul 2024 10:54:07 -0700 Subject: [PATCH 1/5] initial develop branch commit --- Gemfile.lock | 54 +++++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index a8b0b7c..8ba832f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -21,15 +21,15 @@ GEM i18n (>= 1.6, < 2) minitest (>= 5.1) tzinfo (~> 2.0) - addressable (2.8.6) - public_suffix (>= 2.0.2, < 6.0) + addressable (2.8.7) + public_suffix (>= 2.0.2, < 7.0) ast (2.4.2) base64 (0.2.0) - bigdecimal (3.1.5) + bigdecimal (3.1.8) coderay (1.1.3) - concurrent-ruby (1.2.2) - excon (0.109.0) - faraday (2.7.12) + concurrent-ruby (1.3.3) + excon (0.111.0) + faraday (2.8.1) base64 faraday-net_http (>= 2.0, < 3.1) ruby2_keywords (>= 0.0.4) @@ -41,48 +41,50 @@ GEM faraday-multipart (1.0.4) multipart-post (~> 2) faraday-net_http (3.0.2) - i18n (1.14.1) + i18n (1.14.5) concurrent-ruby (~> 1.0) - json (2.6.3) + json (2.7.2) language_server-protocol (3.17.0.3) lz4-ruby (0.3.3) - method_source (1.0.0) - minitest (5.20.0) + method_source (1.1.0) + minitest (5.24.1) minitest-hooks (1.5.1) minitest (> 5.3) multi_json (1.15.0) - multipart-post (2.3.0) - oj (3.16.3) + multipart-post (2.4.1) + oj (3.16.4) bigdecimal (>= 3.0) - parallel (1.23.0) - parser (3.2.2.4) + parallel (1.25.1) + parser (3.3.4.0) ast (~> 2.4.1) racc pry (0.14.2) coderay (~> 1.1) method_source (~> 1.0) - public_suffix (5.0.4) - racc (1.7.3) + public_suffix (5.1.1) + racc (1.8.0) rainbow (3.1.1) - rake (13.1.0) - regexp_parser (2.8.2) - rexml (3.2.6) - rubocop (1.57.2) + rake (13.2.1) + regexp_parser (2.9.2) + rexml (3.3.2) + strscan + rubocop (1.65.0) json (~> 2.3) language_server-protocol (>= 3.17.0) parallel (~> 1.10) - parser (>= 3.2.2.4) + parser (>= 3.3.0.2) rainbow (>= 2.2.2, < 4.0) - regexp_parser (>= 1.8, < 3.0) + regexp_parser (>= 2.4, < 3.0) rexml (>= 3.2.5, < 4.0) - rubocop-ast (>= 1.28.1, < 2.0) + rubocop-ast (>= 1.31.1, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 3.0) - rubocop-ast (1.30.0) - parser (>= 3.2.1.0) + rubocop-ast (1.31.3) + parser (>= 3.3.1.0) ruby-progressbar (1.13.0) ruby2_keywords (0.0.5) spawnling (2.1.5) + strscan (3.1.0) tzinfo (2.0.6) concurrent-ruby (~> 1.0) unicode-display_width (2.5.0) @@ -103,4 +105,4 @@ DEPENDENCIES rubocop (~> 1.43) BUNDLED WITH - 2.3.22 + 2.4.22 From d47734e2715451bb2cd4a8fdcf74f8a20d38d958 Mon Sep 17 00:00:00 2001 From: mdorf Date: Thu, 18 Jul 2024 13:26:29 -0700 Subject: [PATCH 2/5] GF --- Gemfile.lock | 54 +++++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index a8b0b7c..8ba832f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -21,15 +21,15 @@ GEM i18n (>= 1.6, < 2) minitest (>= 5.1) tzinfo (~> 2.0) - addressable (2.8.6) - public_suffix (>= 2.0.2, < 6.0) + addressable (2.8.7) + public_suffix (>= 2.0.2, < 7.0) ast (2.4.2) base64 (0.2.0) - bigdecimal (3.1.5) + bigdecimal (3.1.8) coderay (1.1.3) - concurrent-ruby (1.2.2) - excon (0.109.0) - faraday (2.7.12) + concurrent-ruby (1.3.3) + excon (0.111.0) + faraday (2.8.1) base64 faraday-net_http (>= 2.0, < 3.1) ruby2_keywords (>= 0.0.4) @@ -41,48 +41,50 @@ GEM faraday-multipart (1.0.4) multipart-post (~> 2) faraday-net_http (3.0.2) - i18n (1.14.1) + i18n (1.14.5) concurrent-ruby (~> 1.0) - json (2.6.3) + json (2.7.2) language_server-protocol (3.17.0.3) lz4-ruby (0.3.3) - method_source (1.0.0) - minitest (5.20.0) + method_source (1.1.0) + minitest (5.24.1) minitest-hooks (1.5.1) minitest (> 5.3) multi_json (1.15.0) - multipart-post (2.3.0) - oj (3.16.3) + multipart-post (2.4.1) + oj (3.16.4) bigdecimal (>= 3.0) - parallel (1.23.0) - parser (3.2.2.4) + parallel (1.25.1) + parser (3.3.4.0) ast (~> 2.4.1) racc pry (0.14.2) coderay (~> 1.1) method_source (~> 1.0) - public_suffix (5.0.4) - racc (1.7.3) + public_suffix (5.1.1) + racc (1.8.0) rainbow (3.1.1) - rake (13.1.0) - regexp_parser (2.8.2) - rexml (3.2.6) - rubocop (1.57.2) + rake (13.2.1) + regexp_parser (2.9.2) + rexml (3.3.2) + strscan + rubocop (1.65.0) json (~> 2.3) language_server-protocol (>= 3.17.0) parallel (~> 1.10) - parser (>= 3.2.2.4) + parser (>= 3.3.0.2) rainbow (>= 2.2.2, < 4.0) - regexp_parser (>= 1.8, < 3.0) + regexp_parser (>= 2.4, < 3.0) rexml (>= 3.2.5, < 4.0) - rubocop-ast (>= 1.28.1, < 2.0) + rubocop-ast (>= 1.31.1, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 3.0) - rubocop-ast (1.30.0) - parser (>= 3.2.1.0) + rubocop-ast (1.31.3) + parser (>= 3.3.1.0) ruby-progressbar (1.13.0) ruby2_keywords (0.0.5) spawnling (2.1.5) + strscan (3.1.0) tzinfo (2.0.6) concurrent-ruby (~> 1.0) unicode-display_width (2.5.0) @@ -103,4 +105,4 @@ DEPENDENCIES rubocop (~> 1.43) BUNDLED WITH - 2.3.22 + 2.4.22 From 5b86b8891224b9971b149228c574ab72c5fc91a7 Mon Sep 17 00:00:00 2001 From: Michael Dorf Date: Tue, 28 Apr 2026 16:57:11 -0700 Subject: [PATCH 3/5] auto-flatten paged /users responses; slim User attrs Pairs with the ontologies_api /users pagination change. Two changes that together let existing callers of `LinkedData::Client::Models::User.all` keep working without code changes after the API switches to mandatory pagination. Collection#all detects paged responses (collection/pageCount/nextPage shape) and walks `nextPage` links transparently, returning a flat Array. Falls through to existing behavior when the response is already an Array (pre-pagination back-compat) or when the caller explicitly requested a `:page` (passthrough). Internal pagesize: 5_000 to minimize request count. Models::User defaults to a slim `@include_attrs` allow-list (username, email, role, firstName, lastName, created) instead of "all", and overrides `.all` to inject `display_context: false, display_links: false`. Drops the heavy User payload (custom ontologies, tokens, hashes, JSON-LD context, HATEOAS links) for list/select use cases. Adds Collection tests for paged-flatten, page-passthrough, and User.all slim-defaults. --- lib/ontologies_api_client/collection.rb | 52 +++++++++++++++++++++++- lib/ontologies_api_client/models/user.rb | 7 +++- test/models/test_collection.rb | 40 ++++++++++++++++++ 3 files changed, 97 insertions(+), 2 deletions(-) diff --git a/lib/ontologies_api_client/collection.rb b/lib/ontologies_api_client/collection.rb index 97ccc0f..4245735 100644 --- a/lib/ontologies_api_client/collection.rb +++ b/lib/ontologies_api_client/collection.rb @@ -10,6 +10,8 @@ def self.included(base) end module ClassMethods + PAGED_COLLECTION_SIZE = 5_000 + ## # Allows for arbitrary find_by methods. For example: # Ontology.find_by_acronym("BRO") @@ -53,7 +55,12 @@ def collection_path # Get all resources from the base collection for a resource def all(*args) params = args.shift || {} - entry_point(@media_type, params) + request_params = collection_request_params(params) + response = entry_point(@media_type, request_params) + + return response if page_requested?(params) || !paged_collection?(response) + + all_pages(response, request_params) end ## @@ -113,6 +120,49 @@ def find_by(attrs, *args) bools.all? end end + + private + + def all_pages(first_page, params) + collection = Array(first_page.collection) + next_page = first_page.nextPage + + while next_page + page = entry_point(@media_type, next_page_params(params, next_page)) + collection.concat(Array(page.collection)) + next_page = page.nextPage + end + + collection + end + + def collection_request_params(params) + return params if page_requested?(params) || page_size_requested?(params) + + params.merge(pagesize: PAGED_COLLECTION_SIZE) + end + + def next_page_params(params, page) + page_params = params.dup + page_params[page_params.key?("page") ? "page" : :page] = page + page_params[:pagesize] = PAGED_COLLECTION_SIZE unless page_size_requested?(page_params) + page_params + end + + def page_requested?(params) + params.key?(:page) || params.key?("page") + end + + def page_size_requested?(params) + params.key?(:pagesize) || params.key?("pagesize") + end + + def paged_collection?(response) + response.respond_to?(:collection) && + response.respond_to?(:pageCount) && + response.respond_to?(:nextPage) && + response.collection.is_a?(Array) + end end end end diff --git a/lib/ontologies_api_client/models/user.rb b/lib/ontologies_api_client/models/user.rb index e0d9fbf..0742db7 100644 --- a/lib/ontologies_api_client/models/user.rb +++ b/lib/ontologies_api_client/models/user.rb @@ -9,7 +9,12 @@ class User < LinkedData::Client::Base include LinkedData::Client::ReadWrite @media_type = "http://data.bioontology.org/metadata/User" - @include_attrs = "all" + @include_attrs = "username,email,role,firstName,lastName,created" + + def self.all(*args) + params = args.shift || {} + super({ display_context: false, display_links: false }.merge(params), *args) + end def self.authenticate(user, password) auth_params = {user: user, password: password, include: "all"} diff --git a/test/models/test_collection.rb b/test/models/test_collection.rb index 03c5993..94e3d17 100644 --- a/test/models/test_collection.rb +++ b/test/models/test_collection.rb @@ -17,6 +17,46 @@ def test_all assert onts.length > 350 end + def test_all_flattens_paged_collections + calls = [] + pages = { + 1 => OpenStruct.new(collection: %w[a b], pageCount: 2, nextPage: 2), + 2 => OpenStruct.new(collection: %w[c], pageCount: 2, nextPage: nil) + } + + TestOntology.stub(:entry_point, ->(_media_type, params) { + calls << params + pages.fetch(params[:page] || 1) + }) do + assert_equal %w[a b c], TestOntology.all + end + + assert_equal [{ pagesize: 5_000 }, { pagesize: 5_000, page: 2 }], calls + end + + def test_all_returns_page_when_page_requested + requested_page = OpenStruct.new(collection: %w[a b], pageCount: 2, nextPage: 2) + + TestOntology.stub(:entry_point, ->(_media_type, _params) { requested_page }) do + assert_same requested_page, TestOntology.all(page: 1) + end + end + + def test_user_all_uses_lightweight_defaults + calls = [] + + LinkedData::Client::Models::User.stub(:entry_point, ->(_media_type, params) { + calls << params + [] + }) do + assert_equal [], LinkedData::Client::Models::User.all + end + + assert_equal "username,email,role,firstName,lastName,created", LinkedData::Client::Models::User.include_attrs + assert_equal false, calls.first[:display_context] + assert_equal false, calls.first[:display_links] + end + def test_class_for_type media_type = 'http://data.bioontology.org/metadata/Category' type_cls = LinkedData::Client::Base.class_for_type(media_type) From e4420117e7c58578324cd7ff821b4e61c3e69866 Mon Sep 17 00:00:00 2001 From: Michael Dorf Date: Tue, 28 Apr 2026 16:57:11 -0700 Subject: [PATCH 4/5] add /users benchmark script for perf debugging Manual benchmark harness for the /users endpoint. Times two paths (raw Net::HTTP and Models::User.all) plus an optional --page=N --pagesize=M single-page mode for isolating per-page cost without the all-pages walk. Used to verify the ontologies_api + ontologies_linked_data#286 fixes against the production timeout issue. Not loaded by the test suite; invoked manually via: UT_APIKEY= bundle exec ruby test/benchmark/users.rb --- test/benchmark/users.rb | 129 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 test/benchmark/users.rb diff --git a/test/benchmark/users.rb b/test/benchmark/users.rb new file mode 100644 index 0000000..283a2dc --- /dev/null +++ b/test/benchmark/users.rb @@ -0,0 +1,129 @@ +# frozen_string_literal: true + +require 'benchmark' +require 'json' +require 'net/http' +require 'optparse' +require 'uri' + +$stdout.sync = true + +ENV['UT_APIKEY'] ||= 'manual-timing' + +require_relative '../../lib/ontologies_api_client' +require_relative '../../config/config' + +options = { pagesize: 5_000 } +OptionParser.new do |opts| + opts.banner = 'Usage: ruby test/benchmark/users.rb [--page=N] [--pagesize=M]' + opts.on('--page=N', Integer, 'Single-page mode: fetch one page (skips all-pages walk)') { |v| options[:page] = v } + opts.on('--pagesize=M', Integer, "Page size (default: #{options[:pagesize]})") { |v| options[:pagesize] = v } +end.parse! + +def users_uri(params = {}) + uri = URI.join(LinkedData::Client.settings.rest_url.chomp('/') + '/', 'users') + uri.query = URI.encode_www_form(params) + uri +end + +def get_json(uri) + request = Net::HTTP::Get.new(uri) + request['Accept'] = 'application/json' + request['Authorization'] = "apikey token=#{LinkedData::Client.settings.apikey}" + request['User-Agent'] = "NCBO API Ruby Client v#{LinkedData::Client::VERSION}" + + Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') do |http| + http.request(request) + end +end + +def summarize_json(body) + parsed = JSON.parse(body) + + if parsed.is_a?(Array) + { shape: 'array', count: parsed.length } + elsif parsed.is_a?(Hash) && parsed['collection'].is_a?(Array) + { + shape: 'page', + count: parsed['collection'].length, + page: parsed['page'], + page_count: parsed['pageCount'], + total_count: parsed['totalCount'], + next_page: parsed['nextPage'] + } + else + { shape: parsed.class.name } + end +rescue JSON::ParserError => e + { shape: 'unparseable', error: e.message } +end + +def time_raw(label, params) + uri = users_uri(params) + response = nil + + elapsed = Benchmark.realtime { response = get_json(uri) } + + summary = summarize_json(response.body) + puts "\n#{label}" + puts " uri: #{uri}" + puts " status: #{response.code}" + puts " elapsed: #{format('%.3f', elapsed)}s" + puts " bytes: #{response.body.bytesize}" + puts " summary: #{summary}" +end + +def time_raw_users_include_all + time_raw('Raw GET /users?include=all (no pagination params)', include: 'all') +end + +def time_raw_users_paged(page:, pagesize:) + time_raw("Raw GET /users (page=#{page}, pagesize=#{pagesize}, include=all)", + page: page, pagesize: pagesize, include: 'all') +end + +def time_user_all + users = nil + + elapsed = Benchmark.realtime do + users = LinkedData::Client::Models::User.all + end + + puts "\nLinkedData::Client::Models::User.all (auto-paginate, walks all pages)" + puts " elapsed: #{format('%.3f', elapsed)}s" + puts " class: #{users.class}" + puts " count: #{users.respond_to?(:length) ? users.length : 'n/a'}" + puts " first: #{users.first.username if users.respond_to?(:first) && users.first.respond_to?(:username)}" +end + +def time_user_single_page(page:, pagesize:) + result = nil + + elapsed = Benchmark.realtime do + result = LinkedData::Client::Models::User.all(page: page, pagesize: pagesize) + end + + puts "\nLinkedData::Client::Models::User.all(page: #{page}, pagesize: #{pagesize})" + puts " elapsed: #{format('%.3f', elapsed)}s" + puts " class: #{result.class}" + if result.respond_to?(:collection) + puts " page: #{result.page} / #{result.pageCount}" + puts " total_count: #{result.totalCount}" + puts " collection.length: #{result.collection.length}" + puts " next_page: #{result.nextPage.inspect}" + first = result.collection.first + puts " first: #{first.username if first.respond_to?(:username)}" + else + puts " (response was not paged) preview: #{result.inspect[0, 200]}" + end +end + +puts "REST URL: #{LinkedData::Client.settings.rest_url}" + +if options[:page] + time_raw_users_paged(page: options[:page], pagesize: options[:pagesize]) + time_user_single_page(page: options[:page], pagesize: options[:pagesize]) +else + time_raw_users_include_all + time_user_all +end From dad23b925e9b963f2b64872544ac3d4f884d2ac4 Mon Sep 17 00:00:00 2001 From: Michael Dorf Date: Fri, 1 May 2026 11:27:48 -0700 Subject: [PATCH 5/5] add explicit Array-passthrough test for Collection#all MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per PR review feedback: the back-compat invariant for non-paginated endpoints (e.g. /ontologies, /groups, /categories today) is currently only exercised implicitly by the network-dependent `test_all`. If we ever paginate one of those endpoints, that test would silently switch to exercising the paged path instead, leaving the Array passthrough uncovered. Adds test_all_passes_through_array_response — stubs entry_point to return a flat Array and asserts Collection#all returns it unchanged. Also asserts pagesize: 5_000 is still injected on the request (the API simply ignores it on non-paginated endpoints). --- test/models/test_collection.rb | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/test/models/test_collection.rb b/test/models/test_collection.rb index e6a3f94..5da3a75 100644 --- a/test/models/test_collection.rb +++ b/test/models/test_collection.rb @@ -42,6 +42,30 @@ def test_all_returns_page_when_page_requested end end + # Back-compat invariant: when an endpoint returns a flat Array (i.e. it + # hasn't been paginated yet — e.g. /ontologies, /groups, /categories + # today), Collection#all must pass the response through unchanged. This + # is what keeps non-paginated endpoints working after this gem's + # auto-flatten change. Guards against future refactors of the page-walk + # logic accidentally breaking the non-paged path; complements the + # network-dependent `test_all` which would only catch the regression + # against a live API. + def test_all_passes_through_array_response + array_response = [OpenStruct.new(id: 'a'), OpenStruct.new(id: 'b'), OpenStruct.new(id: 'c')] + calls = [] + + TestOntology.stub(:entry_point, ->(_media_type, params) { + calls << params + array_response + }) do + assert_same array_response, TestOntology.all + end + + # Sanity: pagesize is still injected on the request — the API simply + # ignores it and returns an Array, which we hand back as-is. + assert_equal [{ pagesize: 5_000 }], calls + end + def test_user_all_uses_lightweight_defaults calls = []