Skip to content

Commit c06650d

Browse files
authored
DRIVERS-2035 Use minimum RTT for CSOT maxTimeMS calculation instead of 90th percentile (#1350)
Require at least 2 RTT samples, otherwise use 0 as RTT. Only keep last 10 samples. Update tests to wait for multiple RTTs.
1 parent 745e486 commit c06650d

File tree

6 files changed

+323
-86
lines changed

6 files changed

+323
-86
lines changed

source/client-side-operations-timeout/client-side-operations-timeout.rst

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ Command Execution
299299
~~~~~~~~~~~~~~~~~
300300

301301
If ``timeoutMS`` is set, drivers MUST append a ``maxTimeMS`` field to
302-
commands executed against a MongoDB server using the 90th percentile RTT of
302+
commands executed against a MongoDB server using the ``minRoundTripTime`` field of
303303
the selected server. Note that this value MUST be retrieved during server
304304
selection using the ``servers`` field of the same `TopologyDescription
305305
<../server-discovery-and-monitoring/server-discovery-and-monitoring.rst#TopologyDescription>`__
@@ -309,17 +309,17 @@ server is reset to the default description (e.g. due to an error in the
309309
monitoring thread) after it has been selected but before the RTT is
310310
retrieved.
311311

312-
If the 90th percentile RTT of the selected server is less than the remaining
313-
timeoutMS, the value of this field MUST be ``remaining timeoutMS - 90th
314-
percentile RTT``. If not, drivers MUST return a timeout error without
312+
If the ``minRoundTripTime`` is less than the remaining timeoutMS,
313+
the value of this field MUST be ``remaining timeoutMS - minRoundTripTime``.
314+
If not, drivers MUST return a timeout error without
315315
attempting to send the message to the server. This is done to ensure that an
316316
operation is not routed to the server if it will likely fail with a socket
317317
timeout as that could cause connection churn. The ``maxTimeMS`` field MUST be
318318
appended after all blocking work is complete.
319319

320320
After wire message construction, drivers MUST check for timeout before
321321
writing the message to the server. If the timeout has expired or the amount
322-
of time remaining is less than the selected server's 90th percentile RTT,
322+
of time remaining is less than the selected server's minimum RTT,
323323
drivers MUST return the connection to the pool and raise a timeout exception.
324324
Otherwise, drivers MUST set the connection’s write timeout to the remaining
325325
``timeoutMS`` value before writing a message to the server. After the write
@@ -899,6 +899,16 @@ introduce a new knob and increase the API surface of drivers without providing
899899
a significant benefit.
900900

901901

902+
Drivers use minimum RTT to short circuit operations
903+
---------------------------------------------------
904+
905+
A previous version of this spec used the 90th percentile RTT to short
906+
circuit operations that might otherwise fail with a socket timeout.
907+
We decided to change this logic to avoid canceling operations that may
908+
have a high chance of succeeding and also remove a dependency on t-digest.
909+
Instead, drivers use the minimum RTT from the last 10 samples, or 0 until
910+
at least 2 samples have been recorded.
911+
902912
Future work
903913
===========
904914

@@ -922,3 +932,4 @@ Changelog
922932

923933
:2022-10-05: Remove spec front matter.
924934
:2022-01-19: Initial version.
935+
:2022-11-17: Use minimum RTT for maxTimeMS calculation instead of 90th percentile RTT.

source/client-side-operations-timeout/tests/command-execution.json

Lines changed: 170 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,14 @@
33
"schemaVersion": "1.9",
44
"runOnRequirements": [
55
{
6-
"minServerVersion": "4.9"
6+
"minServerVersion": "4.9",
7+
"topologies": [
8+
"single",
9+
"replicaset",
10+
"sharded-replicaset",
11+
"sharded"
12+
],
13+
"serverless": "forbid"
714
}
815
],
916
"createEntities": [
@@ -45,7 +52,7 @@
4552
],
4653
"appName": "reduceMaxTimeMSTest",
4754
"blockConnection": true,
48-
"blockTimeMS": 20
55+
"blockTimeMS": 50
4956
}
5057
}
5158
}
@@ -61,7 +68,9 @@
6168
"useMultipleMongoses": false,
6269
"uriOptions": {
6370
"appName": "reduceMaxTimeMSTest",
64-
"w": 1
71+
"w": 1,
72+
"timeoutMS": 500,
73+
"heartbeatFrequencyMS": 500
6574
},
6675
"observeEvents": [
6776
"commandStartedEvent"
@@ -75,33 +84,31 @@
7584
"databaseName": "test"
7685
}
7786
},
78-
{
79-
"collection": {
80-
"id": "regularCollection",
81-
"database": "database",
82-
"collectionName": "coll"
83-
}
84-
},
8587
{
8688
"collection": {
8789
"id": "timeoutCollection",
8890
"database": "database",
89-
"collectionName": "timeoutColl",
90-
"collectionOptions": {
91-
"timeoutMS": 60
92-
}
91+
"collectionName": "timeoutColl"
9392
}
9493
}
9594
]
9695
}
9796
},
9897
{
9998
"name": "insertOne",
100-
"object": "regularCollection",
99+
"object": "timeoutCollection",
101100
"arguments": {
102101
"document": {
103102
"_id": 1
104-
}
103+
},
104+
"timeoutMS": 100000
105+
}
106+
},
107+
{
108+
"name": "wait",
109+
"object": "testRunner",
110+
"arguments": {
111+
"ms": 1000
105112
}
106113
},
107114
{
@@ -123,10 +130,7 @@
123130
"commandName": "insert",
124131
"databaseName": "test",
125132
"command": {
126-
"insert": "coll",
127-
"maxTimeMS": {
128-
"$$exists": false
129-
}
133+
"insert": "timeoutColl"
130134
}
131135
}
132136
},
@@ -137,7 +141,7 @@
137141
"command": {
138142
"insert": "timeoutColl",
139143
"maxTimeMS": {
140-
"$$lte": 60
144+
"$$lte": 450
141145
}
142146
}
143147
}
@@ -164,7 +168,7 @@
164168
],
165169
"appName": "rttTooHighTest",
166170
"blockConnection": true,
167-
"blockTimeMS": 20
171+
"blockTimeMS": 50
168172
}
169173
}
170174
}
@@ -180,7 +184,9 @@
180184
"useMultipleMongoses": false,
181185
"uriOptions": {
182186
"appName": "rttTooHighTest",
183-
"w": 1
187+
"w": 1,
188+
"timeoutMS": 10,
189+
"heartbeatFrequencyMS": 500
184190
},
185191
"observeEvents": [
186192
"commandStartedEvent"
@@ -196,31 +202,153 @@
196202
},
197203
{
198204
"collection": {
199-
"id": "regularCollection",
205+
"id": "timeoutCollection",
200206
"database": "database",
201-
"collectionName": "coll"
207+
"collectionName": "timeoutColl"
208+
}
209+
}
210+
]
211+
}
212+
},
213+
{
214+
"name": "insertOne",
215+
"object": "timeoutCollection",
216+
"arguments": {
217+
"document": {
218+
"_id": 1
219+
},
220+
"timeoutMS": 100000
221+
}
222+
},
223+
{
224+
"name": "wait",
225+
"object": "testRunner",
226+
"arguments": {
227+
"ms": 1000
228+
}
229+
},
230+
{
231+
"name": "insertOne",
232+
"object": "timeoutCollection",
233+
"arguments": {
234+
"document": {
235+
"_id": 2
236+
}
237+
},
238+
"expectError": {
239+
"isTimeoutError": true
240+
}
241+
},
242+
{
243+
"name": "insertOne",
244+
"object": "timeoutCollection",
245+
"arguments": {
246+
"document": {
247+
"_id": 3
248+
}
249+
},
250+
"expectError": {
251+
"isTimeoutError": true
252+
}
253+
},
254+
{
255+
"name": "insertOne",
256+
"object": "timeoutCollection",
257+
"arguments": {
258+
"document": {
259+
"_id": 4
260+
}
261+
},
262+
"expectError": {
263+
"isTimeoutError": true
264+
}
265+
}
266+
],
267+
"expectEvents": [
268+
{
269+
"client": "client",
270+
"events": [
271+
{
272+
"commandStartedEvent": {
273+
"commandName": "insert",
274+
"databaseName": "test",
275+
"command": {
276+
"insert": "timeoutColl"
277+
}
278+
}
279+
}
280+
]
281+
}
282+
]
283+
},
284+
{
285+
"description": "short-circuit is not enabled with only 1 RTT measurement",
286+
"operations": [
287+
{
288+
"name": "failPoint",
289+
"object": "testRunner",
290+
"arguments": {
291+
"client": "failPointClient",
292+
"failPoint": {
293+
"configureFailPoint": "failCommand",
294+
"mode": "alwaysOn",
295+
"data": {
296+
"failCommands": [
297+
"hello",
298+
"isMaster"
299+
],
300+
"appName": "reduceMaxTimeMSTest",
301+
"blockConnection": true,
302+
"blockTimeMS": 100
303+
}
304+
}
305+
}
306+
},
307+
{
308+
"name": "createEntities",
309+
"object": "testRunner",
310+
"arguments": {
311+
"entities": [
312+
{
313+
"client": {
314+
"id": "client",
315+
"useMultipleMongoses": false,
316+
"uriOptions": {
317+
"appName": "reduceMaxTimeMSTest",
318+
"w": 1,
319+
"timeoutMS": 90,
320+
"heartbeatFrequencyMS": 100000
321+
},
322+
"observeEvents": [
323+
"commandStartedEvent"
324+
]
325+
}
326+
},
327+
{
328+
"database": {
329+
"id": "database",
330+
"client": "client",
331+
"databaseName": "test"
202332
}
203333
},
204334
{
205335
"collection": {
206336
"id": "timeoutCollection",
207337
"database": "database",
208-
"collectionName": "timeoutColl",
209-
"collectionOptions": {
210-
"timeoutMS": 2
211-
}
338+
"collectionName": "timeoutColl"
212339
}
213340
}
214341
]
215342
}
216343
},
217344
{
218345
"name": "insertOne",
219-
"object": "regularCollection",
346+
"object": "timeoutCollection",
220347
"arguments": {
221348
"document": {
222349
"_id": 1
223-
}
350+
},
351+
"timeoutMS": 100000
224352
}
225353
},
226354
{
@@ -230,9 +358,6 @@
230358
"document": {
231359
"_id": 2
232360
}
233-
},
234-
"expectError": {
235-
"isTimeoutError": true
236361
}
237362
}
238363
],
@@ -245,9 +370,18 @@
245370
"commandName": "insert",
246371
"databaseName": "test",
247372
"command": {
248-
"insert": "coll",
373+
"insert": "timeoutColl"
374+
}
375+
}
376+
},
377+
{
378+
"commandStartedEvent": {
379+
"commandName": "insert",
380+
"databaseName": "test",
381+
"command": {
382+
"insert": "timeoutColl",
249383
"maxTimeMS": {
250-
"$$exists": false
384+
"$$lte": 450
251385
}
252386
}
253387
}

0 commit comments

Comments
 (0)