From 1d11f371914c70213e7e846a040b936a23b8855d Mon Sep 17 00:00:00 2001 From: Anna Buianova Date: Wed, 12 Feb 2025 18:42:04 +0300 Subject: [PATCH 1/9] Moved out code to the service, added naive rspec test --- .rspec | 1 + Gemfile | 3 + Gemfile.lock | 34 ++++++++++ app/services/trips_importer.rb | 57 +++++++++++++++++ config/database.yml | 3 + db/schema.rb | 16 +++-- lib/tasks/utils.rake | 32 +--------- spec/rails_helper.rb | 70 +++++++++++++++++++++ spec/services/trips_importer_spec.rb | 9 +++ spec/spec_helper.rb | 94 ++++++++++++++++++++++++++++ 10 files changed, 280 insertions(+), 39 deletions(-) create mode 100644 .rspec create mode 100644 app/services/trips_importer.rb create mode 100644 spec/rails_helper.rb create mode 100644 spec/services/trips_importer_spec.rb create mode 100644 spec/spec_helper.rb diff --git a/.rspec b/.rspec new file mode 100644 index 00000000..c99d2e73 --- /dev/null +++ b/.rspec @@ -0,0 +1 @@ +--require spec_helper diff --git a/Gemfile b/Gemfile index 34074dfd..3ad3f2cd 100644 --- a/Gemfile +++ b/Gemfile @@ -8,6 +8,9 @@ gem 'pg' gem 'puma' gem 'listen' gem 'bootsnap' +gem 'pry' +gem 'rspec' +gem 'rspec-rails' gem 'rack-mini-profiler' # Windows does not include zoneinfo files, so bundle the tzinfo-data gem diff --git a/Gemfile.lock b/Gemfile.lock index a9ddd818..085e5b10 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -78,13 +78,16 @@ GEM bootsnap (1.18.4) msgpack (~> 1.2) builder (3.3.0) + coderay (1.1.3) concurrent-ruby (1.3.5) connection_pool (2.5.0) crass (1.0.6) date (3.4.1) + diff-lcs (1.5.1) drb (2.2.1) erubi (1.13.1) ffi (1.17.1-arm64-darwin) + ffi (1.17.1-x86_64-linux-gnu) globalid (1.2.1) activesupport (>= 6.1) i18n (1.14.7) @@ -107,6 +110,7 @@ GEM net-pop net-smtp marcel (1.0.4) + method_source (1.1.0) mini_mime (1.1.5) minitest (5.25.4) msgpack (1.8.0) @@ -122,10 +126,15 @@ GEM nio4r (2.7.4) nokogiri (1.18.2-arm64-darwin) racc (~> 1.4) + nokogiri (1.18.2-x86_64-linux-gnu) + racc (~> 1.4) pg (1.5.9) pp (0.6.2) prettyprint prettyprint (0.2.0) + pry (0.15.2) + coderay (~> 1.1) + method_source (~> 1.0) psych (5.2.3) date stringio @@ -179,6 +188,27 @@ GEM psych (>= 4.0.0) reline (0.6.0) io-console (~> 0.5) + rspec (3.13.0) + rspec-core (~> 3.13.0) + rspec-expectations (~> 3.13.0) + rspec-mocks (~> 3.13.0) + rspec-core (3.13.3) + rspec-support (~> 3.13.0) + rspec-expectations (3.13.3) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.13.0) + rspec-mocks (3.13.2) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.13.0) + rspec-rails (7.1.1) + actionpack (>= 7.0) + activesupport (>= 7.0) + railties (>= 7.0) + rspec-core (~> 3.13) + rspec-expectations (~> 3.13) + rspec-mocks (~> 3.13) + rspec-support (~> 3.13) + rspec-support (3.13.2) securerandom (0.4.1) stringio (3.1.2) thor (1.3.2) @@ -195,14 +225,18 @@ GEM PLATFORMS arm64-darwin-24 + x86_64-linux DEPENDENCIES bootsnap listen pg + pry puma rack-mini-profiler rails (~> 8.0.1) + rspec + rspec-rails tzinfo-data RUBY VERSION diff --git a/app/services/trips_importer.rb b/app/services/trips_importer.rb new file mode 100644 index 00000000..f921de1b --- /dev/null +++ b/app/services/trips_importer.rb @@ -0,0 +1,57 @@ +class TripsImporter + def initialize(file = 'fixtures/small.json') + @file = file + end + + def self.call(...) + new(...).call + end + + def call + json = JSON.parse(File.read(file)) + + ActiveRecord::Base.transaction do + City.delete_all + Bus.delete_all + Service.delete_all + Trip.delete_all + ActiveRecord::Base.connection.execute('delete from buses_services;') + + # TODO + # файл large довольно быстро читается на самом деле + # нужно написать тест сначала + # вынести в класс для этого будет удобно, я думаю + # bulk insert (cities, buses, services, trips) + # in batches надо, видимо + + # cities = Set.new + # trip_services = Set.new + # buses = [] + + json.each do |trip| + from = City.find_or_create_by(name: trip['from']) + to = City.find_or_create_by(name: trip['to']) + services = [] + trip['bus']['services'].each do |service| + s = Service.find_or_create_by(name: service) + services << s + end + bus = Bus.find_or_create_by(number: trip['bus']['number']) + bus.update(model: trip['bus']['model'], services: services) + + Trip.create!( + from: from, + to: to, + bus: bus, + start_time: trip['start_time'], + duration_minutes: trip['duration_minutes'], + price_cents: trip['price_cents'], + ) + end + end + end + + private + + attr_reader :file +end \ No newline at end of file diff --git a/config/database.yml b/config/database.yml index e116cfa6..5feec182 100644 --- a/config/database.yml +++ b/config/database.yml @@ -17,6 +17,9 @@ default: &default adapter: postgresql encoding: unicode + host: localhost + user: postgres + password: postgres # For details on connection pooling, see Rails configuration guide # http://guides.rubyonrails.org/configuring.html#database-pooling pool: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %> diff --git a/db/schema.rb b/db/schema.rb index f6921e45..40c9c8ae 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -2,18 +2,17 @@ # of editing this file, please use the migrations feature of Active Record to # incrementally modify your database, and then regenerate this schema definition. # -# Note that this schema.rb definition is the authoritative source for your -# database schema. If you need to create the application database on another -# system, you should be using db:schema:load, not running all the migrations -# from scratch. The latter is a flawed and unsustainable approach (the more migrations -# you'll amass, the slower it'll run and the greater likelihood for issues). +# This file is the source Rails uses to define your schema when running `bin/rails +# db:schema:load`. When creating a new database, `bin/rails db:schema:load` tends to +# be faster and is potentially less error prone than running all of your +# migrations from scratch. Old migrations may fail to apply correctly if those +# migrations use external dependencies or application code. # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2019_03_30_193044) do - +ActiveRecord::Schema[8.0].define(version: 2019_03_30_193044) do # These are extensions that must be enabled in order to support this database - enable_extension "plpgsql" + enable_extension "pg_catalog.plpgsql" create_table "buses", force: :cascade do |t| t.string "number" @@ -41,5 +40,4 @@ t.integer "price_cents" t.integer "bus_id" end - end diff --git a/lib/tasks/utils.rake b/lib/tasks/utils.rake index 540fe871..a5d0794f 100644 --- a/lib/tasks/utils.rake +++ b/lib/tasks/utils.rake @@ -1,34 +1,6 @@ # Наивная загрузка данных из json-файла в БД # rake reload_json[fixtures/small.json] -task :reload_json, [:file_name] => :environment do |_task, args| - json = JSON.parse(File.read(args.file_name)) - - ActiveRecord::Base.transaction do - City.delete_all - Bus.delete_all - Service.delete_all - Trip.delete_all - ActiveRecord::Base.connection.execute('delete from buses_services;') - json.each do |trip| - from = City.find_or_create_by(name: trip['from']) - to = City.find_or_create_by(name: trip['to']) - services = [] - trip['bus']['services'].each do |service| - s = Service.find_or_create_by(name: service) - services << s - end - bus = Bus.find_or_create_by(number: trip['bus']['number']) - bus.update(model: trip['bus']['model'], services: services) - - Trip.create!( - from: from, - to: to, - bus: bus, - start_time: trip['start_time'], - duration_minutes: trip['duration_minutes'], - price_cents: trip['price_cents'], - ) - end - end +task :reload_json, [:file_name] => :environment do |_task, args| + TripsImporter.call(args.file_name) end diff --git a/spec/rails_helper.rb b/spec/rails_helper.rb new file mode 100644 index 00000000..cb2ba553 --- /dev/null +++ b/spec/rails_helper.rb @@ -0,0 +1,70 @@ +# This file is copied to spec/ when you run 'rails generate rspec:install' +require 'spec_helper' +ENV['RAILS_ENV'] ||= 'test' +require_relative '../config/environment' +# Prevent database truncation if the environment is production +abort("The Rails environment is running in production mode!") if Rails.env.production? +# Uncomment the line below in case you have `--require rails_helper` in the `.rspec` file +# that will avoid rails generators crashing because migrations haven't been run yet +# return unless Rails.env.test? +require 'rspec/rails' +# Add additional requires below this line. Rails is not loaded until this point! + +# Requires supporting ruby files with custom matchers and macros, etc, in +# spec/support/ and its subdirectories. Files matching `spec/**/*_spec.rb` are +# run as spec files by default. This means that files in spec/support that end +# in _spec.rb will both be required and run as specs, causing the specs to be +# run twice. It is recommended that you do not name files matching this glob to +# end with _spec.rb. You can configure this pattern with the --pattern +# option on the command line or in ~/.rspec, .rspec or `.rspec-local`. +# +# The following line is provided for convenience purposes. It has the downside +# of increasing the boot-up time by auto-requiring all files in the support +# directory. Alternatively, in the individual `*_spec.rb` files, manually +# require only the support files necessary. +# +# Rails.root.glob('spec/support/**/*.rb').sort_by(&:to_s).each { |f| require f } + +# Checks for pending migrations and applies them before tests are run. +# If you are not using ActiveRecord, you can remove these lines. +begin + ActiveRecord::Migration.maintain_test_schema! +rescue ActiveRecord::PendingMigrationError => e + abort e.to_s.strip +end +RSpec.configure do |config| + # Remove this line if you're not using ActiveRecord or ActiveRecord fixtures + config.fixture_paths = [ + Rails.root.join('spec/fixtures') + ] + + # If you're not using ActiveRecord, or you'd prefer not to run each of your + # examples within a transaction, remove the following line or assign false + # instead of true. + config.use_transactional_fixtures = true + + # You can uncomment this line to turn off ActiveRecord support entirely. + # config.use_active_record = false + + # RSpec Rails uses metadata to mix in different behaviours to your tests, + # for example enabling you to call `get` and `post` in request specs. e.g.: + # + # RSpec.describe UsersController, type: :request do + # # ... + # end + # + # The different available types are documented in the features, such as in + # https://rspec.info/features/7-1/rspec-rails + # + # You can also this infer these behaviours automatically by location, e.g. + # /spec/models would pull in the same behaviour as `type: :model` but this + # behaviour is considered legacy and will be removed in a future version. + # + # To enable this behaviour uncomment the line below. + # config.infer_spec_type_from_file_location! + + # Filter lines from Rails gems in backtraces. + config.filter_rails_from_backtrace! + # arbitrary gems may also be filtered via: + # config.filter_gems_from_backtrace("gem name") +end diff --git a/spec/services/trips_importer_spec.rb b/spec/services/trips_importer_spec.rb new file mode 100644 index 00000000..3b27fc60 --- /dev/null +++ b/spec/services/trips_importer_spec.rb @@ -0,0 +1,9 @@ +require "rails_helper" + +RSpec.describe TripsImporter, type: :service do + it "imports cities and trips" do + expect do + described_class.call(Rails.root.join("fixtures/example.json")) + end.to change(City, :count).by(2).and change(Service, :count).by(2).and change(Bus, :count).by(1).and change(Trip, :count).by(10) + end +end \ No newline at end of file diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 00000000..327b58ea --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,94 @@ +# This file was generated by the `rails generate rspec:install` command. Conventionally, all +# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`. +# The generated `.rspec` file contains `--require spec_helper` which will cause +# this file to always be loaded, without a need to explicitly require it in any +# files. +# +# Given that it is always loaded, you are encouraged to keep this file as +# light-weight as possible. Requiring heavyweight dependencies from this file +# will add to the boot time of your test suite on EVERY test run, even for an +# individual file that may not need all of that loaded. Instead, consider making +# a separate helper file that requires the additional dependencies and performs +# the additional setup, and require it from the spec files that actually need +# it. +# +# See https://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration +RSpec.configure do |config| + # rspec-expectations config goes here. You can use an alternate + # assertion/expectation library such as wrong or the stdlib/minitest + # assertions if you prefer. + config.expect_with :rspec do |expectations| + # This option will default to `true` in RSpec 4. It makes the `description` + # and `failure_message` of custom matchers include text for helper methods + # defined using `chain`, e.g.: + # be_bigger_than(2).and_smaller_than(4).description + # # => "be bigger than 2 and smaller than 4" + # ...rather than: + # # => "be bigger than 2" + expectations.include_chain_clauses_in_custom_matcher_descriptions = true + end + + # rspec-mocks config goes here. You can use an alternate test double + # library (such as bogus or mocha) by changing the `mock_with` option here. + config.mock_with :rspec do |mocks| + # Prevents you from mocking or stubbing a method that does not exist on + # a real object. This is generally recommended, and will default to + # `true` in RSpec 4. + mocks.verify_partial_doubles = true + end + + # This option will default to `:apply_to_host_groups` in RSpec 4 (and will + # have no way to turn it off -- the option exists only for backwards + # compatibility in RSpec 3). It causes shared context metadata to be + # inherited by the metadata hash of host groups and examples, rather than + # triggering implicit auto-inclusion in groups with matching metadata. + config.shared_context_metadata_behavior = :apply_to_host_groups + +# The settings below are suggested to provide a good initial experience +# with RSpec, but feel free to customize to your heart's content. +=begin + # This allows you to limit a spec run to individual examples or groups + # you care about by tagging them with `:focus` metadata. When nothing + # is tagged with `:focus`, all examples get run. RSpec also provides + # aliases for `it`, `describe`, and `context` that include `:focus` + # metadata: `fit`, `fdescribe` and `fcontext`, respectively. + config.filter_run_when_matching :focus + + # Allows RSpec to persist some state between runs in order to support + # the `--only-failures` and `--next-failure` CLI options. We recommend + # you configure your source control system to ignore this file. + config.example_status_persistence_file_path = "spec/examples.txt" + + # Limits the available syntax to the non-monkey patched syntax that is + # recommended. For more details, see: + # https://rspec.info/features/3-12/rspec-core/configuration/zero-monkey-patching-mode/ + config.disable_monkey_patching! + + # Many RSpec users commonly either run the entire suite or an individual + # file, and it's useful to allow more verbose output when running an + # individual spec file. + if config.files_to_run.one? + # Use the documentation formatter for detailed output, + # unless a formatter has already been configured + # (e.g. via a command-line flag). + config.default_formatter = "doc" + end + + # Print the 10 slowest examples and example groups at the + # end of the spec run, to help surface which specs are running + # particularly slow. + config.profile_examples = 10 + + # Run specs in random order to surface order dependencies. If you find an + # order dependency and want to debug it, you can fix the order by providing + # the seed, which is printed after each run. + # --seed 1234 + config.order = :random + + # Seed global randomization in this process using the `--seed` CLI option. + # Setting this allows you to use `--seed` to deterministically reproduce + # test failures related to randomization by passing the same `--seed` value + # as the one that triggered the failure. + Kernel.srand config.seed +=end +end From 81c9eb3c4b58c11c0c95bc6c50c25f6a0b5fb4ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BD=D0=BD=D0=B0=20=D0=91=D1=83=D1=8F=D0=BD=D0=BE?= =?UTF-8?q?=D0=B2=D0=B0?= Date: Fri, 14 Feb 2025 19:40:42 +0300 Subject: [PATCH 2/9] Added unique index to services + upsert services --- Gemfile | 2 ++ Gemfile.lock | 5 ++++ app/services/trips_importer.rb | 29 ++++++++++--------- case_study.md | 26 +++++++++++++++++ ...0214163423_add_unique_index_to_services.rb | 7 +++++ db/schema.rb | 3 +- lib/tasks/utils.rake | 19 +++++++++++- 7 files changed, 76 insertions(+), 15 deletions(-) create mode 100644 case_study.md create mode 100644 db/migrate/20250214163423_add_unique_index_to_services.rb diff --git a/Gemfile b/Gemfile index 3ad3f2cd..3cc15cb9 100644 --- a/Gemfile +++ b/Gemfile @@ -12,6 +12,8 @@ gem 'pry' gem 'rspec' gem 'rspec-rails' gem 'rack-mini-profiler' +gem 'ruby-prof' +gem 'strong_migrations' # Windows does not include zoneinfo files, so bundle the tzinfo-data gem gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby] diff --git a/Gemfile.lock b/Gemfile.lock index 085e5b10..44ba277d 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -209,8 +209,11 @@ GEM rspec-mocks (~> 3.13) rspec-support (~> 3.13) rspec-support (3.13.2) + ruby-prof (1.7.1) securerandom (0.4.1) stringio (3.1.2) + strong_migrations (2.2.0) + activerecord (>= 7) thor (1.3.2) timeout (0.4.3) tzinfo (2.0.6) @@ -237,6 +240,8 @@ DEPENDENCIES rails (~> 8.0.1) rspec rspec-rails + ruby-prof + strong_migrations tzinfo-data RUBY VERSION diff --git a/app/services/trips_importer.rb b/app/services/trips_importer.rb index f921de1b..74562424 100644 --- a/app/services/trips_importer.rb +++ b/app/services/trips_importer.rb @@ -10,13 +10,9 @@ def self.call(...) def call json = JSON.parse(File.read(file)) - ActiveRecord::Base.transaction do - City.delete_all - Bus.delete_all - Service.delete_all - Trip.delete_all - ActiveRecord::Base.connection.execute('delete from buses_services;') + clean_database + ActiveRecord::Base.transaction do # TODO # файл large довольно быстро читается на самом деле # нужно написать тест сначала @@ -31,13 +27,12 @@ def call json.each do |trip| from = City.find_or_create_by(name: trip['from']) to = City.find_or_create_by(name: trip['to']) - services = [] - trip['bus']['services'].each do |service| - s = Service.find_or_create_by(name: service) - services << s - end + + service_names = trip['bus']['services'].map { |name| { name: name } } + service_ids = Service.upsert_all(service_names, unique_by: :name) + bus = Bus.find_or_create_by(number: trip['bus']['number']) - bus.update(model: trip['bus']['model'], services: services) + bus.update(model: trip['bus']['model'], service_ids: service_ids.rows.flatten) Trip.create!( from: from, @@ -54,4 +49,12 @@ def call private attr_reader :file -end \ No newline at end of file + + def clean_database + City.delete_all + Bus.delete_all + Service.delete_all + Trip.delete_all + ActiveRecord::Base.connection.execute('delete from buses_services;') + end +end diff --git a/case_study.md b/case_study.md new file mode 100644 index 00000000..04275837 --- /dev/null +++ b/case_study.md @@ -0,0 +1,26 @@ + + +Изначально: + +15 сек + +Переписала trips на import: + +small.json - 18 сек + +не пошло - откатила + +--------------- +Переписала services на upsert и добавила уникальный индекс на name + +```ruby +service_names = trip['bus']['services'].map { |name| { name: name } } +service_ids = Service.upsert_all(service_names, unique_by: :name) + +bus = Bus.find_or_create_by(number: trip['bus']['number']) +bus.update(model: trip['bus']['model'], service_ids: service_ids.rows.flatten) +``` + +Теперь на small - 6 сек +----------------- +Теперь автобусы, добавим уникальный индекс: diff --git a/db/migrate/20250214163423_add_unique_index_to_services.rb b/db/migrate/20250214163423_add_unique_index_to_services.rb new file mode 100644 index 00000000..b4623761 --- /dev/null +++ b/db/migrate/20250214163423_add_unique_index_to_services.rb @@ -0,0 +1,7 @@ +class AddUniqueIndexToServices < ActiveRecord::Migration[8.0] + disable_ddl_transaction! + + def change + add_index :services, :name, unique: true, algorithm: :concurrently + end +end diff --git a/db/schema.rb b/db/schema.rb index 40c9c8ae..3e9fafa5 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.0].define(version: 2019_03_30_193044) do +ActiveRecord::Schema[8.0].define(version: 2025_02_14_163423) do # These are extensions that must be enabled in order to support this database enable_extension "pg_catalog.plpgsql" @@ -30,6 +30,7 @@ create_table "services", force: :cascade do |t| t.string "name" + t.index ["name"], name: "index_services_on_name", unique: true end create_table "trips", force: :cascade do |t| diff --git a/lib/tasks/utils.rake b/lib/tasks/utils.rake index a5d0794f..d30178b3 100644 --- a/lib/tasks/utils.rake +++ b/lib/tasks/utils.rake @@ -2,5 +2,22 @@ # rake reload_json[fixtures/small.json] task :reload_json, [:file_name] => :environment do |_task, args| - TripsImporter.call(args.file_name) + + require 'ruby-prof' + start_time = Time.current + + # GC.disable + # RubyProf.measure_mode = RubyProf::WALL_TIME + + + # result = RubyProf::Profile.profile do + TripsImporter.call(args.file_name) + # end + + # printer = RubyProf::CallStackPrinter.new(result) + # printer.print(File.open('ruby_prof_reports/callstack.html', 'w+')) + + end_time = Time.current + + p end_time - start_time end From d78d8311598c611e72b97bda4c1d007e1011d361 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BD=D0=BD=D0=B0=20=D0=91=D1=83=D1=8F=D0=BD=D0=BE?= =?UTF-8?q?=D0=B2=D0=B0?= Date: Fri, 14 Feb 2025 20:09:35 +0300 Subject: [PATCH 3/9] Kind of optimization --- app/services/trips_importer.rb | 39 ++++++++++++------- case_study.md | 12 +++++- ...4164236_add_unique_index_to_bus_numbers.rb | 7 ++++ db/schema.rb | 3 +- 4 files changed, 46 insertions(+), 15 deletions(-) create mode 100644 db/migrate/20250214164236_add_unique_index_to_bus_numbers.rb diff --git a/app/services/trips_importer.rb b/app/services/trips_importer.rb index 74562424..042bd8fd 100644 --- a/app/services/trips_importer.rb +++ b/app/services/trips_importer.rb @@ -15,14 +15,15 @@ def call ActiveRecord::Base.transaction do # TODO # файл large довольно быстро читается на самом деле - # нужно написать тест сначала - # вынести в класс для этого будет удобно, я думаю - # bulk insert (cities, buses, services, trips) - # in batches надо, видимо - # cities = Set.new - # trip_services = Set.new - # buses = [] + + # unique index + insert cities + # bulk insert типа + # insert buses_services + test + # + # bus = Bus.create!(model: "Икарус", number: "100") + + json.each do |trip| from = City.find_or_create_by(name: trip['from']) @@ -31,16 +32,28 @@ def call service_names = trip['bus']['services'].map { |name| { name: name } } service_ids = Service.upsert_all(service_names, unique_by: :name) - bus = Bus.find_or_create_by(number: trip['bus']['number']) - bus.update(model: trip['bus']['model'], service_ids: service_ids.rows.flatten) + number = trip['bus']['number'] + + bus = Bus.upsert_all([number: number, model: trip['bus']['model']], unique_by: :number, on_duplicate: :update) + # bus = Bus.find(bus.first["id"]) + # bus.update(service_ids: service_ids.rows.flatten) + + # TODO!!! + test + # # "insert into buses_services(bus_id, service_id) values (?)", + + bus_id = bus.first["id"] + # binding.pry + # bus = Bus.find_or_create_by(number: number) + # bus.update(model: trip['bus']['model'], service_ids: service_ids.rows.flatten) - Trip.create!( - from: from, - to: to, - bus: bus, + Trip.insert_all([{ + from_id: from.id, + to_id: to.id, + bus_id: bus_id, start_time: trip['start_time'], duration_minutes: trip['duration_minutes'], price_cents: trip['price_cents'], + }] ) end end diff --git a/case_study.md b/case_study.md index 04275837..5a4b9323 100644 --- a/case_study.md +++ b/case_study.md @@ -22,5 +22,15 @@ bus.update(model: trip['bus']['model'], service_ids: service_ids.rows.flatten) ``` Теперь на small - 6 сек +medium - 54сек ----------------- -Теперь автобусы, добавим уникальный индекс: +Теперь автобусы, добавим уникальный индекс на number: + + + # bus = Bus.upsert_all([number: number, model: trip['bus']['model']], unique_by: :number, on_duplicate: :update) + # bus = Bus.find(bus.first["id"]) + # bus.update(service_ids: service_ids.rows.flatten) + # # "insert into buses_services(bus_id, service_id) values (?)", + + # bus.first["id"] + # binding.pry \ No newline at end of file diff --git a/db/migrate/20250214164236_add_unique_index_to_bus_numbers.rb b/db/migrate/20250214164236_add_unique_index_to_bus_numbers.rb new file mode 100644 index 00000000..929a49a7 --- /dev/null +++ b/db/migrate/20250214164236_add_unique_index_to_bus_numbers.rb @@ -0,0 +1,7 @@ +class AddUniqueIndexToBusNumbers < ActiveRecord::Migration[8.0] + disable_ddl_transaction! + + def change + add_index :buses, :number, unique: true, algorithm: :concurrently + end +end diff --git a/db/schema.rb b/db/schema.rb index 3e9fafa5..7cbd3ae7 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,13 +10,14 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.0].define(version: 2025_02_14_163423) do +ActiveRecord::Schema[8.0].define(version: 2025_02_14_164236) do # These are extensions that must be enabled in order to support this database enable_extension "pg_catalog.plpgsql" create_table "buses", force: :cascade do |t| t.string "number" t.string "model" + t.index ["number"], name: "index_buses_on_number", unique: true end create_table "buses_services", force: :cascade do |t| From 5567afd2438863ff3c9f47afecc330e0327b3972 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BD=D0=BD=D0=B0=20=D0=91=D1=83=D1=8F=D0=BD=D0=BE?= =?UTF-8?q?=D0=B2=D0=B0?= Date: Fri, 14 Feb 2025 20:49:28 +0300 Subject: [PATCH 4/9] Bulk insert trips --- app/services/trips_importer.rb | 24 +++++++++++++++--------- lib/tasks/utils.rake | 4 ++-- spec/services/trips_importer_spec.rb | 4 +++- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/app/services/trips_importer.rb b/app/services/trips_importer.rb index 042bd8fd..ef6831f9 100644 --- a/app/services/trips_importer.rb +++ b/app/services/trips_importer.rb @@ -22,40 +22,46 @@ def call # insert buses_services + test # # bus = Bus.create!(model: "Икарус", number: "100") - - - + trips = [] json.each do |trip| from = City.find_or_create_by(name: trip['from']) to = City.find_or_create_by(name: trip['to']) service_names = trip['bus']['services'].map { |name| { name: name } } - service_ids = Service.upsert_all(service_names, unique_by: :name) + service_ids = Service.upsert_all(service_names, unique_by: :name).rows.flatten number = trip['bus']['number'] bus = Bus.upsert_all([number: number, model: trip['bus']['model']], unique_by: :number, on_duplicate: :update) # bus = Bus.find(bus.first["id"]) # bus.update(service_ids: service_ids.rows.flatten) + bus_id = bus.first["id"] # TODO!!! + test - # # "insert into buses_services(bus_id, service_id) values (?)", + # # "insert into buses_services(bus_id, service_id) values (?, ?)", + + if service_ids.present? + values = service_ids.map { |service_id| "(#{bus_id}, #{service_id})" }.join(", ") + # binding.pry + sql = "INSERT INTO buses_services (bus_id, service_id) VALUES #{values};" + ActiveRecord::Base.connection.execute(sql) + end - bus_id = bus.first["id"] # binding.pry # bus = Bus.find_or_create_by(number: number) # bus.update(model: trip['bus']['model'], service_ids: service_ids.rows.flatten) - Trip.insert_all([{ + trips.push({ from_id: from.id, to_id: to.id, bus_id: bus_id, start_time: trip['start_time'], duration_minutes: trip['duration_minutes'], price_cents: trip['price_cents'], - }] - ) + }) end + + Trip.insert_all trips end end diff --git a/lib/tasks/utils.rake b/lib/tasks/utils.rake index d30178b3..5d9d2525 100644 --- a/lib/tasks/utils.rake +++ b/lib/tasks/utils.rake @@ -2,8 +2,8 @@ # rake reload_json[fixtures/small.json] task :reload_json, [:file_name] => :environment do |_task, args| - - require 'ruby-prof' + # ActiveRecord::Base.logger = Logger.new STDOUT + # require 'ruby-prof' start_time = Time.current # GC.disable diff --git a/spec/services/trips_importer_spec.rb b/spec/services/trips_importer_spec.rb index 3b27fc60..08fb997f 100644 --- a/spec/services/trips_importer_spec.rb +++ b/spec/services/trips_importer_spec.rb @@ -5,5 +5,7 @@ expect do described_class.call(Rails.root.join("fixtures/example.json")) end.to change(City, :count).by(2).and change(Service, :count).by(2).and change(Bus, :count).by(1).and change(Trip, :count).by(10) + + expect(Bus.last.services.count).to eq(2) end -end \ No newline at end of file +end From 12be987c0e4cd8edd53e502e69efcd335c21f2ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BD=D0=BD=D0=B0=20=D0=91=D1=83=D1=8F=D0=BD=D0=BE?= =?UTF-8?q?=D0=B2=D0=B0?= Date: Fri, 14 Feb 2025 21:53:28 +0300 Subject: [PATCH 5/9] Unique index to cities + rewrote importer --- app/services/trips_importer.rb | 79 +++++++++++-------- ...250214175040_add_unique_index_to_cities.rb | 7 ++ db/schema.rb | 3 +- lib/tasks/utils.rake | 15 +--- 4 files changed, 60 insertions(+), 44 deletions(-) create mode 100644 db/migrate/20250214175040_add_unique_index_to_cities.rb diff --git a/app/services/trips_importer.rb b/app/services/trips_importer.rb index ef6831f9..9f05e7ce 100644 --- a/app/services/trips_importer.rb +++ b/app/services/trips_importer.rb @@ -10,50 +10,55 @@ def self.call(...) def call json = JSON.parse(File.read(file)) + ActiveRecord::Base.logger = nil + clean_database ActiveRecord::Base.transaction do - # TODO - # файл large довольно быстро читается на самом деле - + city_names = Set.new + service_names = Set.new + buses = {} - # unique index + insert cities - # bulk insert типа - # insert buses_services + test - # - # bus = Bus.create!(model: "Икарус", number: "100") - trips = [] + # первый проход - собираем "справочные" данные - города, услуги, автобусы + # собираем в Set или хэш, так чтобы удобно было вставлять в бд json.each do |trip| - from = City.find_or_create_by(name: trip['from']) - to = City.find_or_create_by(name: trip['to']) + city_names.add trip['from'] + city_names.add trip['to'] - service_names = trip['bus']['services'].map { |name| { name: name } } - service_ids = Service.upsert_all(service_names, unique_by: :name).rows.flatten + service_names.merge trip['bus']['services'] + buses[trip['bus']['number']] = trip['bus']['model'] + end - number = trip['bus']['number'] + # вставляем справочное + City.insert_all city_names.map { |name| { name: name } } + Service.insert_all service_names.map { |name| { name: name } } + Bus.insert_all buses.map { |number, model| { number: number, model: model }} - bus = Bus.upsert_all([number: number, model: trip['bus']['model']], unique_by: :number, on_duplicate: :update) - # bus = Bus.find(bus.first["id"]) - # bus.update(service_ids: service_ids.rows.flatten) - bus_id = bus.first["id"] + # формируем хэши, чтобы удобно получить доступ к id при втором проходе + cities = City.all.each_with_object({}) { |city, hash| hash[city.name] = city.id } + services = Service.all.each_with_object({}) { |service, hash| hash[service.name] = service.id } + buses = Bus.all.each_with_object({}) { |bus, hash| hash[bus.number] = bus.id } - # TODO!!! + test - # # "insert into buses_services(bus_id, service_id) values (?, ?)", + # тут соберём пары автобус-услуга + buses_services = Set.new - if service_ids.present? - values = service_ids.map { |service_id| "(#{bus_id}, #{service_id})" }.join(", ") - # binding.pry - sql = "INSERT INTO buses_services (bus_id, service_id) VALUES #{values};" - ActiveRecord::Base.connection.execute(sql) - end + # тут данные по поездкам для вставки + trips = [] - # binding.pry - # bus = Bus.find_or_create_by(number: number) - # bus.update(model: trip['bus']['model'], service_ids: service_ids.rows.flatten) + json.each do |trip| + from_id = cities[trip['from']] + to_id = cities[trip['to']] + bus_id = buses[trip['bus']['number']] + + # заполняем пары автобус-услуга + service_ids = services.values_at(*trip['bus']['services']) + service_ids.each do |service_id| + buses_services.add([bus_id, service_id]) + end trips.push({ - from_id: from.id, - to_id: to.id, + from_id: from_id, + to_id: to_id, bus_id: bus_id, start_time: trip['start_time'], duration_minutes: trip['duration_minutes'], @@ -61,7 +66,19 @@ def call }) end + # вставка данных о поездках + # пока не стала батчить, т.к. и так довольно быстро происходит Trip.insert_all trips + + # тоже uniqueness надо добавить + # вот такой нелегальный insert buses_services + # вообще чаще используем has_and_belongs_to_many , потому что часто в связке потом нужны доп. данные и таймстемпы, и свой id + # тогда можно было бы insert использовать + if buses_services.present? + values = buses_services.map { |arr| "(#{arr[0]}, #{arr[1]})" }.join(", ") + sql = "INSERT INTO buses_services (bus_id, service_id) VALUES #{values};" + ActiveRecord::Base.connection.execute(sql) + end end end diff --git a/db/migrate/20250214175040_add_unique_index_to_cities.rb b/db/migrate/20250214175040_add_unique_index_to_cities.rb new file mode 100644 index 00000000..850e3007 --- /dev/null +++ b/db/migrate/20250214175040_add_unique_index_to_cities.rb @@ -0,0 +1,7 @@ +class AddUniqueIndexToCities < ActiveRecord::Migration[8.0] + disable_ddl_transaction! + + def change + add_index :cities, :name, unique: true, algorithm: :concurrently + end +end diff --git a/db/schema.rb b/db/schema.rb index 7cbd3ae7..55db5cc2 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.0].define(version: 2025_02_14_164236) do +ActiveRecord::Schema[8.0].define(version: 2025_02_14_175040) do # These are extensions that must be enabled in order to support this database enable_extension "pg_catalog.plpgsql" @@ -27,6 +27,7 @@ create_table "cities", force: :cascade do |t| t.string "name" + t.index ["name"], name: "index_cities_on_name", unique: true end create_table "services", force: :cascade do |t| diff --git a/lib/tasks/utils.rake b/lib/tasks/utils.rake index 5d9d2525..fb78517e 100644 --- a/lib/tasks/utils.rake +++ b/lib/tasks/utils.rake @@ -2,20 +2,11 @@ # rake reload_json[fixtures/small.json] task :reload_json, [:file_name] => :environment do |_task, args| - # ActiveRecord::Base.logger = Logger.new STDOUT - # require 'ruby-prof' - start_time = Time.current - - # GC.disable - # RubyProf.measure_mode = RubyProf::WALL_TIME + ActiveRecord::Base.logger = nil + start_time = Time.current - # result = RubyProf::Profile.profile do - TripsImporter.call(args.file_name) - # end - - # printer = RubyProf::CallStackPrinter.new(result) - # printer.print(File.open('ruby_prof_reports/callstack.html', 'w+')) + TripsImporter.call(args.file_name) end_time = Time.current From 6bbf6bddd14be65676a73f4a57fb56b5659a3871 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BD=D0=BD=D0=B0=20=D0=91=D1=83=D1=8F=D0=BD=D0=BE?= =?UTF-8?q?=D0=B2=D0=B0?= Date: Fri, 14 Feb 2025 22:33:33 +0300 Subject: [PATCH 6/9] Added uniqui index to buses_services --- app/services/trips_importer.rb | 2 -- .../20250214185429_add_unique_index_to_buses_services.rb | 7 +++++++ db/schema.rb | 3 ++- lib/tasks/utils.rake | 2 -- 4 files changed, 9 insertions(+), 5 deletions(-) create mode 100644 db/migrate/20250214185429_add_unique_index_to_buses_services.rb diff --git a/app/services/trips_importer.rb b/app/services/trips_importer.rb index 9f05e7ce..73e17014 100644 --- a/app/services/trips_importer.rb +++ b/app/services/trips_importer.rb @@ -10,8 +10,6 @@ def self.call(...) def call json = JSON.parse(File.read(file)) - ActiveRecord::Base.logger = nil - clean_database ActiveRecord::Base.transaction do diff --git a/db/migrate/20250214185429_add_unique_index_to_buses_services.rb b/db/migrate/20250214185429_add_unique_index_to_buses_services.rb new file mode 100644 index 00000000..f759751a --- /dev/null +++ b/db/migrate/20250214185429_add_unique_index_to_buses_services.rb @@ -0,0 +1,7 @@ +class AddUniqueIndexToBusesServices < ActiveRecord::Migration[8.0] + disable_ddl_transaction! + + def change + add_index :buses_services, [:bus_id, :service_id], unique: true, algorithm: :concurrently + end +end diff --git a/db/schema.rb b/db/schema.rb index 55db5cc2..fe1390a2 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.0].define(version: 2025_02_14_175040) do +ActiveRecord::Schema[8.0].define(version: 2025_02_14_185429) do # These are extensions that must be enabled in order to support this database enable_extension "pg_catalog.plpgsql" @@ -23,6 +23,7 @@ create_table "buses_services", force: :cascade do |t| t.integer "bus_id" t.integer "service_id" + t.index ["bus_id", "service_id"], name: "index_buses_services_on_bus_id_and_service_id", unique: true end create_table "cities", force: :cascade do |t| diff --git a/lib/tasks/utils.rake b/lib/tasks/utils.rake index fb78517e..beb3b4f2 100644 --- a/lib/tasks/utils.rake +++ b/lib/tasks/utils.rake @@ -2,8 +2,6 @@ # rake reload_json[fixtures/small.json] task :reload_json, [:file_name] => :environment do |_task, args| - ActiveRecord::Base.logger = nil - start_time = Time.current TripsImporter.call(args.file_name) From f1cb57f7da4ce3365534e806bceb16d7bd654a0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BD=D0=BD=D0=B0=20=D0=91=D1=83=D1=8F=D0=BD=D0=BE?= =?UTF-8?q?=D0=B2=D0=B0?= Date: Fri, 14 Feb 2025 22:33:46 +0300 Subject: [PATCH 7/9] Case study, task A (import optimization) --- case_study.md | 54 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/case_study.md b/case_study.md index 5a4b9323..d44b6476 100644 --- a/case_study.md +++ b/case_study.md @@ -1,36 +1,46 @@ +## A. Импорт данных +Метрика: +- изначально замерила время на small.json - было около 15 секунд +- далее меряла small.json -> medium.json -> large.json -Изначально: +### Подготовка -15 сек +Сначала вынесла код в сервис-обжект (PORO) `TripsImporter` для удобства + написала базовый тест. +Далее стала смотреть, как оптимизировать. -Переписала trips на import: +### Оптимизация - исследование -small.json - 18 сек +Сначала попробовала поэтапно идти: -не пошло - откатила +- переписала на один insert trips, не трогая другие части; стало ещё медленнее - откатила пока +- добавляла уникальные индексы на справочные данные + использовала upsert, но во-первых не так уж сильно оптимизировало, во-вторых оказалось, что `upsert` не возвращает id, если нет вставки. Тест не отловил, т.к. там одна поездка. Дописывать тест не стала (поленилась), но в реальном приложении нужно. +- попробовала рубипрофом попрофилировать, но такое себе - всё размазано, и так видно, что 100500 запросов идёт +- попробовала проверить, какая часть занимает много времени, комментируя куски кода и запуская, в принципе довольно наглядно. Понятно, что insert trips и sessions долго работает (ожидаемо) +- также смотрела рельсовые логи, видно, что также идёт 100500 мелких запросов +- также померяла, что сама загрузка json занимает не так много времени, поэтому уж пару раз можно загрузить-пройтись ---------------- -Переписала services на upsert и добавила уникальный индекс на name +### Оптимизация -```ruby -service_names = trip['bus']['services'].map { |name| { name: name } } -service_ids = Service.upsert_all(service_names, unique_by: :name) +Решила сначала собрать справочные данные по городам, автобусам и тд, потом вставить справочные данные и запросить получившиеся id из бд и сформировать подходящие структуры данных для дальнейшего поиска при подготовке данных для trips. Попутно добавила уникальные индексы на `name` и тд. + +Далее пройтись ещё раз, подготовить данные по поездкам и услугам автобусов, и уже вставить их отдельно. + +Эта оптимизация была эффективной - файл large стал обрабатываться за ~ 3,35 секунды. -bus = Bus.find_or_create_by(number: trip['bus']['number']) -bus.update(model: trip['bus']['model'], service_ids: service_ids.rows.flatten) +``` +anna@vivosaurus:~/apps/rails-optimization-task3$ be rake reload_json[fixtures/large.json] +3.356575947 ``` -Теперь на small - 6 сек -medium - 54сек ------------------ -Теперь автобусы, добавим уникальный индекс на number: +```ruby +task :reload_json, [:file_name] => :environment do |_task, args| + start_time = Time.current + TripsImporter.call(args.file_name) - # bus = Bus.upsert_all([number: number, model: trip['bus']['model']], unique_by: :number, on_duplicate: :update) - # bus = Bus.find(bus.first["id"]) - # bus.update(service_ids: service_ids.rows.flatten) - # # "insert into buses_services(bus_id, service_id) values (?)", + end_time = Time.current - # bus.first["id"] - # binding.pry \ No newline at end of file + p end_time - start_time +end +``` \ No newline at end of file From 82fc9a1bc4594b2d305e40d182bd4209890d5b66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BD=D0=BD=D0=B0=20=D0=91=D1=83=D1=8F=D0=BD=D0=BE?= =?UTF-8?q?=D0=B2=D0=B0?= Date: Fri, 14 Feb 2025 23:15:00 +0300 Subject: [PATCH 8/9] Simple optimizations for trips rendering --- app/controllers/trips_controller.rb | 2 +- app/views/trips/_delimiter.html.erb | 1 - app/views/trips/_service.html.erb | 1 - app/views/trips/_services.html.erb | 2 +- app/views/trips/index.html.erb | 2 +- case_study.md | 28 +++++++++++++++++++++++++++- 6 files changed, 30 insertions(+), 6 deletions(-) delete mode 100644 app/views/trips/_delimiter.html.erb delete mode 100644 app/views/trips/_service.html.erb diff --git a/app/controllers/trips_controller.rb b/app/controllers/trips_controller.rb index acb38be2..fa03e01a 100644 --- a/app/controllers/trips_controller.rb +++ b/app/controllers/trips_controller.rb @@ -2,6 +2,6 @@ class TripsController < ApplicationController def index @from = City.find_by_name!(params[:from]) @to = City.find_by_name!(params[:to]) - @trips = Trip.where(from: @from, to: @to).order(:start_time) + @trips = Trip.where(from: @from, to: @to).includes(bus: :services).order(:start_time) end end diff --git a/app/views/trips/_delimiter.html.erb b/app/views/trips/_delimiter.html.erb deleted file mode 100644 index 3f845ad0..00000000 --- a/app/views/trips/_delimiter.html.erb +++ /dev/null @@ -1 +0,0 @@ -==================================================== diff --git a/app/views/trips/_service.html.erb b/app/views/trips/_service.html.erb deleted file mode 100644 index 178ea8c0..00000000 --- a/app/views/trips/_service.html.erb +++ /dev/null @@ -1 +0,0 @@ -
  • <%= "#{service.name}" %>
  • diff --git a/app/views/trips/_services.html.erb b/app/views/trips/_services.html.erb index 2de639fc..0be503e1 100644 --- a/app/views/trips/_services.html.erb +++ b/app/views/trips/_services.html.erb @@ -1,6 +1,6 @@
  • Сервисы в автобусе:
    • <% services.each do |service| %> - <%= render "service", service: service %> +
    • <%= "#{service.name}" %>
    • <% end %>
    diff --git a/app/views/trips/index.html.erb b/app/views/trips/index.html.erb index a60bce41..3be71bce 100644 --- a/app/views/trips/index.html.erb +++ b/app/views/trips/index.html.erb @@ -12,5 +12,5 @@ <%= render "services", services: trip.bus.services %> <% end %> - <%= render "delimiter" %> + ==================================================== <% end %> diff --git a/case_study.md b/case_study.md index d44b6476..660d40a4 100644 --- a/case_study.md +++ b/case_study.md @@ -43,4 +43,30 @@ task :reload_json, [:file_name] => :environment do |_task, args| p end_time - start_time end -``` \ No newline at end of file +``` + +## Б. Отображение расписаний + +Изначально время: 8329сек + +Сразу видим в rack-mini-profiler 1437 sql-запросов, делаем includes: +```ruby +@trips = Trip.where(from: @from, to: @to).includes(bus: :services).order(:start_time) +``` + +Время: 3930 + +Дальше видно, что очень много partials загружается: + +- убрала `partial` `service` (незачем в отдельном файле рендерить одну строчку, это не бесплатно) + +Время: 2336 + +- убрала аналогично `partial` `delimiter` + +Время: 632 + + + + + From 864a979f551c8216d4108e37fc2a566252899d17 Mon Sep 17 00:00:00 2001 From: Anna Buianova Date: Sat, 15 Feb 2025 12:42:15 +0300 Subject: [PATCH 9/9] Small improvements for import and case study --- .gitignore | 4 ++++ app/services/trips_importer.rb | 8 ++++---- case_study.md | 12 ++++++------ 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 59c74047..c76c4f53 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,7 @@ /tmp /log /public +fixtures/1M.json +fixtures/10M.json +fixtures/*.gz + diff --git a/app/services/trips_importer.rb b/app/services/trips_importer.rb index 73e17014..abf5d339 100644 --- a/app/services/trips_importer.rb +++ b/app/services/trips_importer.rb @@ -68,12 +68,12 @@ def call # пока не стала батчить, т.к. и так довольно быстро происходит Trip.insert_all trips - # тоже uniqueness надо добавить - # вот такой нелегальный insert buses_services + # вот такой insert into buses_services # вообще чаще используем has_and_belongs_to_many , потому что часто в связке потом нужны доп. данные и таймстемпы, и свой id - # тогда можно было бы insert использовать + # тогда можно было бы insert_all использовать if buses_services.present? - values = buses_services.map { |arr| "(#{arr[0]}, #{arr[1]})" }.join(", ") + # что-то решила на всякий случай подготовить строки ) + values = buses_services.map { |arr| sprintf("(%s, %s)", arr[0], arr[1]) }.join(", ") sql = "INSERT INTO buses_services (bus_id, service_id) VALUES #{values};" ActiveRecord::Base.connection.execute(sql) end diff --git a/case_study.md b/case_study.md index 660d40a4..bcc0edee 100644 --- a/case_study.md +++ b/case_study.md @@ -18,7 +18,7 @@ - попробовала рубипрофом попрофилировать, но такое себе - всё размазано, и так видно, что 100500 запросов идёт - попробовала проверить, какая часть занимает много времени, комментируя куски кода и запуская, в принципе довольно наглядно. Понятно, что insert trips и sessions долго работает (ожидаемо) - также смотрела рельсовые логи, видно, что также идёт 100500 мелких запросов -- также померяла, что сама загрузка json занимает не так много времени, поэтому уж пару раз можно загрузить-пройтись +- также померяла, что сама загрузка (parse и обход) json занимает не так много времени, поэтому уж пару раз можно пройтись ### Оптимизация @@ -26,7 +26,7 @@ Далее пройтись ещё раз, подготовить данные по поездкам и услугам автобусов, и уже вставить их отдельно. -Эта оптимизация была эффективной - файл large стал обрабатываться за ~ 3,35 секунды. +Эта оптимизация была эффективной - файл large стал обрабатываться за ~ 3-3,35 секунды. ``` anna@vivosaurus:~/apps/rails-optimization-task3$ be rake reload_json[fixtures/large.json] @@ -45,6 +45,8 @@ task :reload_json, [:file_name] => :environment do |_task, args| end ``` +Решила рискнуть и попробовать на `1M.json`, получилось за 30 секунд. Как так? Все записи на месте в бд. + ## Б. Отображение расписаний Изначально время: 8329сек @@ -64,9 +66,7 @@ end - убрала аналогично `partial` `delimiter` -Время: 632 - - - +Время: 632 +Решила, что этого хватит.