Skip to content

Commit 61abedd

Browse files
authored
Merge pull request #4298 from mohanasudhan/v3.1.x-fix4131
Btl tcp: Fix racing condition on simultaneous handshake [Backporting the fix to branch v3.1.x]
2 parents 8d804d3 + 81ac30d commit 61abedd

File tree

5 files changed

+21
-30
lines changed

5 files changed

+21
-30
lines changed

.mailmap

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,5 @@ Thomas Naughton <[email protected]> <[email protected]>
109109
110110

111111
Anandhi S Jayakumar <[email protected]>
112+
113+
Mohan Gandhi <[email protected]>

opal/mca/btl/tcp/btl_tcp.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -546,8 +546,9 @@ int mca_btl_tcp_recv_blocking(int sd, void* data, size_t size)
546546
int retval = recv(sd, ((char *)ptr) + cnt, size - cnt, 0);
547547
/* remote closed connection */
548548
if (0 == retval) {
549-
BTL_ERROR(("remote peer unexpectedly closed connection while I was waiting for blocking message"));
550-
return -1;
549+
OPAL_OUTPUT_VERBOSE((100, opal_btl_base_framework.framework_output,
550+
"remote peer unexpectedly closed connection while I was waiting for a blocking message"));
551+
break;
551552
}
552553

553554
/* socket is non-blocking so handle errors */
@@ -556,7 +557,7 @@ int mca_btl_tcp_recv_blocking(int sd, void* data, size_t size)
556557
opal_socket_errno != EAGAIN &&
557558
opal_socket_errno != EWOULDBLOCK) {
558559
BTL_ERROR(("recv(%d) failed: %s (%d)", sd, strerror(opal_socket_errno), opal_socket_errno));
559-
return -1;
560+
break;
560561
}
561562
continue;
562563
}
@@ -568,8 +569,8 @@ int mca_btl_tcp_recv_blocking(int sd, void* data, size_t size)
568569

569570
/*
570571
* A blocking send on a non-blocking socket. Used to send the small
571-
* amount of connection information that identifies the endpoints
572-
* endpoint.
572+
* amount of connection information used during the initial handshake
573+
* (magic string plus process guid)
573574
*/
574575

575576
int mca_btl_tcp_send_blocking(int sd, const void* data, size_t size)

opal/mca/btl/tcp/btl_tcp_component.c

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1355,12 +1355,10 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user)
13551355
char str[128];
13561356

13571357
/* Note, Socket will be in blocking mode during intial handshake
1358-
* hence setting SO_RCVTIMEO to say 2 seconds here to avoid chance
1359-
* of spin forever if it tries to connect to old version
1360-
* as older version will send just process id which won't be long enough
1361-
* to cross sizeof(str) length + process id struct
1362-
* or when the remote side isn't OMPI where it's not going to send
1363-
* any data*/
1358+
* hence setting SO_RCVTIMEO to say 2 seconds here to avoid waiting
1359+
* forever when connecting to older versions (that reply to the
1360+
* handshake with only the guid) or when the remote side isn't OMPI
1361+
*/
13641362

13651363
/* get the current timeout value so we can reset to it */
13661364
if (0 != getsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, (void*)&save, &rcvtimeo_save_len)) {
@@ -1387,7 +1385,6 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user)
13871385

13881386
OBJ_RELEASE(event);
13891387
retval = mca_btl_tcp_recv_blocking(sd, (void *)&hs_msg, sizeof(hs_msg));
1390-
guid = hs_msg.guid;
13911388

13921389
/* An unknown process attempted to connect to Open MPI via TCP.
13931390
* Open MPI uses a "magic" string to trivially verify that the connecting
@@ -1413,6 +1410,8 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user)
14131410
CLOSE_THE_SOCKET(sd);
14141411
return;
14151412
}
1413+
1414+
guid = hs_msg.guid;
14161415
if (0 != strncmp(hs_msg.magic_id, mca_btl_tcp_magic_id_string, len)) {
14171416
opal_output_verbose(20, opal_btl_base_framework.framework_output,
14181417
"process did not receive right magic string. "

opal/mca/btl/tcp/btl_tcp_endpoint.c

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -574,20 +574,6 @@ static void mca_btl_tcp_endpoint_connected(mca_btl_base_endpoint_t* btl_endpoint
574574
}
575575

576576

577-
/*
578-
* A blocking recv on a non-blocking socket. Used to receive the small
579-
* amount of connection information that identifies the remote endpoint (guid).
580-
*/
581-
static int mca_btl_tcp_endpoint_recv_blocking(mca_btl_base_endpoint_t* btl_endpoint, void* data, size_t size)
582-
{
583-
int ret = mca_btl_tcp_recv_blocking(btl_endpoint->endpoint_sd, data, size);
584-
if (ret <= 0) {
585-
mca_btl_tcp_endpoint_close(btl_endpoint);
586-
}
587-
return ret;
588-
}
589-
590-
591577
/*
592578
* Receive the endpoints globally unique process identification from a newly
593579
* connected socket and verify the expected response. If so, move the
@@ -604,9 +590,10 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en
604590
opal_process_name_t guid;
605591

606592
mca_btl_tcp_endpoint_hs_msg_t hs_msg;
607-
retval = mca_btl_tcp_endpoint_recv_blocking(btl_endpoint, &hs_msg, sizeof(hs_msg));
593+
retval = mca_btl_tcp_recv_blocking(btl_endpoint->endpoint_sd, &hs_msg, sizeof(hs_msg));
608594

609595
if (sizeof(hs_msg) != retval) {
596+
mca_btl_tcp_endpoint_close(btl_endpoint);
610597
if (0 == retval) {
611598
/* If we get zero bytes, the peer closed the socket. This
612599
can happen when the two peers started the connection

opal/mca/btl/tcp/btl_tcp_proc.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -486,10 +486,10 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc,
486486
}
487487

488488
/*
489-
* in case one of the peer addresses is already in use,
489+
* in case the peer address has all intended connections,
490490
* mark the complete peer interface as 'not available'
491491
*/
492-
if(endpoint_addr->addr_inuse) {
492+
if(endpoint_addr->addr_inuse >= mca_btl_tcp_component.tcp_num_links) {
493493
peer_interfaces[index]->inuse = 1;
494494
}
495495

@@ -812,7 +812,9 @@ void mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr
812812
OPAL_THREAD_LOCK(&btl_proc->proc_lock);
813813
for( size_t i = 0; i < btl_proc->proc_endpoint_count; i++ ) {
814814
mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i];
815-
/* Check all conditions before going to try to accept the connection. */
815+
/* We are not here to make a decision about what is good socket
816+
* and what is not. We simply check that this socket fit the endpoint
817+
* end we prepare for the real decision function mca_btl_tcp_endpoint_accept. */
816818
if( btl_endpoint->endpoint_addr->addr_family != addr->sa_family ) {
817819
continue;
818820
}

0 commit comments

Comments
 (0)