Skip to content

Commit 584fb4c

Browse files
authored
RUBY-1563 Poll SRV records in unknown topologies (#1524)
* RUBY-1563 Monitor unknown topologies as the spec requires * RUBY-1563 Add a docstring * RUBY-1563 Test unknown to sharded transition * RUBY-1563 Stop SRV monitor in RS and single topologies * RUBY-1563 Test unknown topologies in reconnect test * Wait for server check to complete to ensure topology is changed * Give up on jruby here
1 parent 84c5bb5 commit 584fb4c

File tree

4 files changed

+312
-46
lines changed

4 files changed

+312
-46
lines changed

lib/mongo/cluster.rb

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,8 @@ def initialize(seeds, monitoring, options = Options::Redacted.new)
220220
server_selection_semaphore.wait([time_remaining, 1].min)
221221
end
222222
end
223+
224+
start_stop_srv_monitor
223225
end
224226

225227
# Create a cluster for the provided client, for use when we don't want the
@@ -562,6 +564,8 @@ def run_sdam_flow(previous_desc, updated_desc, options = {})
562564
end
563565
end
564566
end
567+
568+
start_stop_srv_monitor
565569
end
566570

567571
# Some updated descriptions, e.g. a mismatched me one, result in the
@@ -578,8 +582,6 @@ def run_sdam_flow(previous_desc, updated_desc, options = {})
578582
unless updated_desc.unknown?
579583
server_selection_semaphore.broadcast
580584
end
581-
582-
check_and_start_srv_monitor
583585
end
584586

585587
# Sets the list of servers to the addresses in the provided list of address
@@ -824,19 +826,35 @@ def sessions_supported?
824826
end
825827

826828
# @api private
827-
def check_and_start_srv_monitor
828-
return unless topology.is_a?(Topology::Sharded) && options[:srv_uri]
829-
@srv_monitor_lock.synchronize do
830-
unless @srv_monitor
831-
monitor_options = options.merge(
832-
timeout: options[:connect_timeout] || Server::CONNECT_TIMEOUT)
833-
@srv_monitor = _srv_monitor = SrvMonitor.new(self, monitor_options)
834-
finalizer = lambda do
835-
_srv_monitor.stop!
829+
def start_stop_srv_monitor
830+
# SRV URI is either always given or not for a given cluster, if one
831+
# wasn't given we shouldn't ever have an SRV monitor to manage.
832+
return unless options[:srv_uri]
833+
834+
if topology.is_a?(Topology::Sharded) || topology.is_a?(Topology::Unknown)
835+
# Start SRV monitor
836+
@srv_monitor_lock.synchronize do
837+
unless @srv_monitor
838+
monitor_options = options.merge(
839+
timeout: options[:connect_timeout] || Server::CONNECT_TIMEOUT)
840+
@srv_monitor = _srv_monitor = SrvMonitor.new(self, monitor_options)
841+
finalizer = lambda do
842+
_srv_monitor.stop!
843+
end
844+
ObjectSpace.define_finalizer(self, finalizer)
845+
end
846+
@srv_monitor.run!
847+
end
848+
else
849+
# Stop SRV monitor if running. This path is taken when the client
850+
# is given an SRV URI to a standalone/replica set; when the topology
851+
# is discovered, since it's not a sharded cluster, the SRV monitor
852+
# needs to be stopped.
853+
@srv_monitor_lock.synchronize do
854+
if @srv_monitor
855+
@srv_monitor.stop!
836856
end
837-
ObjectSpace.define_finalizer(self, finalizer)
838857
end
839-
@srv_monitor.run!
840858
end
841859
end
842860
end

lib/mongo/srv/monitor.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
module Mongo
1616
module SRV
1717

18+
# Polls SRV records for the URI that a cluster was created for and
19+
# updates the list of servers in the cluster when records change.
20+
#
21+
# @api private
1822
class Monitor
1923
include Loggable
2024

spec/integration/reconnect_spec.rb

Lines changed: 92 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,7 @@
5151
end
5252
end
5353

54-
context 'in sharded topology' do
55-
require_topology :sharded
56-
require_default_port_deployment
57-
require_multi_shard
54+
context 'SRV monitor thread' do
5855

5956
let(:uri) do
6057
"mongodb+srv://test1.test.build.10gen.cc/?tls=#{SpecConfig.instance.ssl?}&tlsInsecure=true".tap do |uri|
@@ -75,37 +72,103 @@
7572
logger: logger))
7673
end
7774

78-
it 'recreates srv monitor' do
75+
let(:wait_for_discovery) do
7976
client.cluster.next_primary
80-
if BSON::Environment.jruby?
81-
# Wait for jruby to start SRV monitor thread
82-
sleep 1
77+
end
78+
79+
let(:wait_for_discovery_again) do
80+
client.cluster.next_primary
81+
end
82+
83+
shared_examples_for 'recreates SRV monitor' do
84+
# JRuby produces this error:
85+
# RSpec::Expectations::ExpectationNotMetError: expected nil to respond to `alive?`
86+
# for this assertion:
87+
# expect(thread).not_to be_alive
88+
# This is bizarre because if thread was nil, the earlier call to
89+
# thread.kill should've similarly failed, but it doesn't.
90+
fails_on_jruby
91+
92+
it 'recreates SRV monitor' do
93+
wait_for_discovery
94+
95+
expect(client.cluster.topology).to be_a(expected_topology_cls)
96+
thread = client.cluster.srv_monitor.instance_variable_get('@thread')
97+
expect(thread).to be_alive
98+
99+
thread.kill
100+
# context switch to let the thread get killed
101+
sleep 0.1
102+
expect(thread).not_to be_alive
103+
104+
client.reconnect
105+
106+
wait_for_discovery_again
107+
108+
new_thread = client.cluster.srv_monitor.instance_variable_get('@thread')
109+
expect(new_thread).not_to eq(thread)
110+
expect(new_thread).to be_alive
83111
end
84-
expect(client.cluster.topology).to be_a(Mongo::Cluster::Topology::Sharded)
85-
thread = client.cluster.srv_monitor.instance_variable_get('@thread')
86-
expect(thread).to be_alive
112+
end
87113

88-
thread.kill
89-
# context switch to let the thread get killed
90-
sleep 0.1
91-
if BSON::Environment.jruby?
92-
# jruby takes a long time here as well
93-
15.times do
94-
if thread.alive?
95-
sleep 1
96-
else
97-
break
98-
end
99-
end
114+
context 'in sharded topology' do
115+
require_topology :sharded
116+
require_default_port_deployment
117+
require_multi_shard
118+
119+
let(:expected_topology_cls) { Mongo::Cluster::Topology::Sharded }
120+
121+
it_behaves_like 'recreates SRV monitor'
122+
end
123+
124+
context 'in unknown topology' do
125+
126+
# JRuby apparently does not implement non-blocking UDP I/O which is used
127+
# by RubyDNS:
128+
# NotImplementedError: recvmsg_nonblock is not implemented
129+
fails_on_jruby
130+
131+
let(:uri) do
132+
"mongodb+srv://test-fake.test.build.10gen.cc/"
100133
end
101-
expect(thread).not_to be_alive
102134

103-
client.reconnect
135+
let(:client) do
136+
ClientRegistry.instance.register_local_client(
137+
Mongo::Client.new(uri, server_selection_timeout: 3.89,
138+
resolv_options: {
139+
nameserver: 'localhost',
140+
nameserver_port: [['localhost', 5300], ['127.0.0.1', 5300]],
141+
},
142+
logger: logger))
143+
end
104144

105-
client.cluster.next_primary
106-
new_thread = client.cluster.srv_monitor.instance_variable_get('@thread')
107-
expect(new_thread).not_to eq(thread)
108-
expect(new_thread).to be_alive
145+
let(:expected_topology_cls) { Mongo::Cluster::Topology::Unknown }
146+
147+
let(:wait_for_discovery) do
148+
# Since the entire test is done in unknown topology, we cannot use
149+
# next_primary to wait for the client to discover the topology.
150+
sleep 5
151+
end
152+
153+
let(:wait_for_discovery_again) do
154+
sleep 5
155+
end
156+
157+
around do |example|
158+
require 'support/dns'
159+
160+
rules = [
161+
['_mongodb._tcp.test-fake.test.build.10gen.cc', :srv,
162+
[0, 0, 2799, 'localhost.test.build.10gen.cc'],
163+
],
164+
]
165+
166+
mock_dns(rules) do
167+
example.run
168+
end
169+
end
170+
171+
it_behaves_like 'recreates SRV monitor'
109172
end
110173
end
111174
end

0 commit comments

Comments
 (0)