From 23a08ab59563e243c9f31578628e5070e2d14ac4 Mon Sep 17 00:00:00 2001 From: Connor Shea <2977353+connorshea@users.noreply.github.com> Date: Thu, 2 Jul 2026 15:50:12 -0600 Subject: [PATCH] Optimize Internet username, email, password, and base64 - username: pick the username shape (first-name-only vs first+last) before generating names, so the discarded variant is never built. - password: reuse frozen digit/special-char tables and grow the character bag in place with concat instead of += (which copies the whole array each time). - base64: reuse frozen urlsafe/standard character tables and append into a pre-sized String instead of building an intermediate array. - sanitize_email_local_part: replace invalid characters with a single gsub against a character-class regex instead of rebuilding the allowed-character array and scanning it per character. Benchmark (Ruby 3.4.9, arm64-darwin25, benchmark-ips): require 'benchmark/ips' require 'faker' Benchmark.ips do |x| x.config(warmup: 1, time: 2) x.report('username') { Faker::Internet.username } x.report('email') { Faker::Internet.email } x.report('password') { Faker::Internet.password } x.report('base64') { Faker::Internet.base64 } end Results: main: username 41.048k (+/-14.7%) i/s email 21.683k (+/- 0.7%) i/s password 394.792k (+/- 4.8%) i/s base64 183.509k (+/- 2.4%) i/s this commit: username 79.313k (+/-18.2%) i/s (~1.9x) email 37.246k (+/- 1.3%) i/s (~1.7x) password 451.683k (+/- 1.0%) i/s (~1.14x) base64 462.621k (+/- 0.5%) i/s (~2.5x) --- lib/faker/internet.rb | 67 ++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/lib/faker/internet.rb b/lib/faker/internet.rb index 429702622d..f4f1cc54fb 100644 --- a/lib/faker/internet.rb +++ b/lib/faker/internet.rb @@ -18,6 +18,23 @@ class Internet < Base [198..198, 18..19, 0..255, 1..255] # 198.18.0.0/15 - Used for benchmark testing of inter-network communications between subnets ].each(&:freeze).freeze + # Characters that are not allowed in the local part of an email address, + # i.e. anything other than alphanumerics and !#$%&'*+-/=?^_`{|}~. + # @private + INVALID_EMAIL_LOCAL_PART_CHARS = %r{[^0-9A-Za-z!#$%&'*+\-/=?^_`{|}~.]} + + # Character sets used by #base64 + # @private + BASE64_URLSAFE_CHARS = [*'0'..'9', *'A'..'Z', *'a'..'z', '-', '_'].freeze + # @private + BASE64_CHARS = [*'0'..'9', *'A'..'Z', *'a'..'z', '+', '/'].freeze + + # Character sets used by #password + # @private + PASSWORD_DIGITS = Array('0'..'9').freeze + # @private + PASSWORD_SPECIAL_CHARS = %w[! @ # $ % ^ & *].freeze + class << self ## # Returns the email address @@ -96,12 +113,15 @@ def username(specifier: nil, separators: %w[. _]) return result[0...specifier.max] end - sample([ - Char.prepare(Faker::Name.first_name), - [Faker::Name.first_name, Faker::Name.last_name].map do |name| - Char.prepare(name) - end.join(sample(separators)) - ]) + # Pick the shape of the username first so we only generate the + # name parts we actually need. + if rand(2).zero? + Char.prepare(Faker::Name.first_name) + else + [Faker::Name.first_name, Faker::Name.last_name].map do |name| + Char.prepare(name) + end.join(sample(separators)) + end end end @@ -154,22 +174,22 @@ def password(min_length: 8, max_length: 16, mix_case: true, special_characters: # use lower_chars by default and add upper_chars if mix_case lower_chars = self::LLetters password << sample(lower_chars) - character_bag += lower_chars + character_bag.concat(lower_chars) - digits = ('0'..'9').to_a + digits = PASSWORD_DIGITS password << sample(digits) - character_bag += digits + character_bag.concat(digits) if mix_case upper_chars = self::ULetters password << sample(upper_chars) - character_bag += upper_chars + character_bag.concat(upper_chars) end if special_characters - special_chars = %w[! @ # $ % ^ & *] + special_chars = PASSWORD_SPECIAL_CHARS password << sample(special_chars) - character_bag += special_chars + character_bag.concat(special_chars) end password << sample(character_bag) while password.length < target_length @@ -528,14 +548,10 @@ def uuid # # @faker.version 2.11.0 def base64(length: 16, padding: false, urlsafe: true) - char_range = [ - Array('0'..'9'), - Array('A'..'Z'), - Array('a'..'z'), - urlsafe ? %w[- _] : %w[+ /] - ].flatten - s = Array.new(length) { sample(char_range) }.join - s += '=' if padding + char_range = urlsafe ? BASE64_URLSAFE_CHARS : BASE64_CHARS + s = ::String.new('', capacity: length + 1) + length.times { s << sample(char_range) } + s << '=' if padding s end @@ -562,16 +578,7 @@ def user(*args) private def sanitize_email_local_part(local_part) - char_range = [ - Array('0'..'9'), - Array('A'..'Z'), - Array('a'..'z'), - "!#$%&'*+-/=?^_`{|}~.".chars - ].flatten - - local_part.chars.map do |char| - char_range.include?(char) ? char : '#' - end.join + local_part.gsub(INVALID_EMAIL_LOCAL_PART_CHARS, '#') end def construct_email(local_part, domain_name)