From d7c9754f9824def2ae14db9b6f4f8d797237537a Mon Sep 17 00:00:00 2001 From: Eugene Kozlov Date: Thu, 18 Apr 2024 10:17:05 -0400 Subject: [PATCH] Use TCP keepalive to detect broken sockets vsomeip does not enforce a single socket between endpoints; it does not actively close an existing socket after a new one is established. It is possible that: - socket is established from nodeA to nodeB - ethernet gets unplugged - nodeA tries to transmit, times out, closes the socket - ethernet is reconnected - nodeA establishes a new socket to node B nodeB never received an RST while the cable was unplugged, still thinks the old socket is established, and effectively "leaks" until TCP keepalive detects the break. At default values this takes 2 hr 10 min. This can be mitigated by reducing the keepalive settings. The implementation of TCP keepalive is different between OS: - Linux can configure all 3 settings on a per-socket basis - QNX can configure 1 setting per-socket and 2 settings globally QNX would need to set the global values using sysctl, perhaps at startup. --- .../src/tcp_server_endpoint_impl.cpp | 59 ++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/implementation/endpoints/src/tcp_server_endpoint_impl.cpp b/implementation/endpoints/src/tcp_server_endpoint_impl.cpp index f83252ae2..96dfe1e7a 100644 --- a/implementation/endpoints/src/tcp_server_endpoint_impl.cpp +++ b/implementation/endpoints/src/tcp_server_endpoint_impl.cpp @@ -5,7 +5,7 @@ // file, You can obtain one at http://mozilla.org/MPL/2.0/. #include - +#include #include #include @@ -272,6 +272,63 @@ void tcp_server_endpoint_impl::accept_cbk(const connection::ptr& _connection, VSOMEIP_WARNING << "tcp_server_endpoint::connect: couldn't enable " << "keep_alive: " << its_error.message(); } + + // Enable sending of keep-alive messages on connection-oriented sockets + // Enables (nonzero) or disables (zero) the periodic transmission of messages + // Should the connected party fail to respond to these messages, the connection is considered broken + int optval = 1; + int rc; + rc = setsockopt(new_connection_socket.native_handle(), SOL_SOCKET, SO_KEEPALIVE, &optval, sizeof(optval)); + if (rc != 0) { + VSOMEIP_WARNING << "tcp_server_endpoint::connect: couldn't enable keep_alive(10)" ; + } +#ifndef __QNX__ + // Linux has 3 parameters configurable on a both a global and per-socket basis + // TCP_KEEPIDLE / tcp_keepalive_time + // TCP_KEEPCNT / tcp_keepalive_probes + // TCP_KEEPINTVL / tcp_keepalive_intvl + // + // Adjust these on the vsomeip socket to detect breaks within IDLE + (CNT * INTVL) seconds + + // The time (in seconds) the connection needs to remain idle before TCP starts sending keepalive probes + optval = 10; + rc = setsockopt(new_connection_socket.native_handle(), IPPROTO_TCP, TCP_KEEPIDLE, &optval, sizeof(optval)); + if (rc != 0) { + VSOMEIP_WARNING << "tcp_server_endpoint::connect: couldn't enable keep_alive(TCP_KEEPIDLE)" ; + } + + // The maximum count of keepalive probes TCP should send before dropping the connection + optval = 2; + rc = setsockopt(new_connection_socket.native_handle(), IPPROTO_TCP, TCP_KEEPCNT, &optval, sizeof(optval)); + if (rc != 0) { + VSOMEIP_WARNING << "tcp_server_endpoint::connect: couldn't enable keep_alive(TCP_KEEPCNT)" ; + } + + // The time (in seconds) between individual keepalive probes + optval = 2; + rc = setsockopt(new_connection_socket.native_handle(), IPPROTO_TCP, TCP_KEEPINTVL, &optval, sizeof(optval)); + if (rc != 0) { + VSOMEIP_WARNING << "tcp_server_endpoint::connect: couldn't enable keep_alive(TCP_KEEPINTVL)" ; + } +#else + // QNX has 1 parameter configurable on a per-socket basis + // TCP_KEEPALIVE (documented full-seconds) + // and 3 parameters configurable on a global basis as defaults + // sysctl net.inet.tcp.keepidle (half-seconds) + // TCPCTL_KEEPINTVL / sysctl net.inet.tcp.keepintvl (half-seconds) + // TCPCTL_KEEPCNT / sysctl net.inet.tcp.keepcnt + // + // Adjust TCP_KEEPALIVE on the vsomeip socket + // Rely on reasonable global sysctl for CNT and INTVL + // Detect breaks within IDLE + (CNT * INTVL) seconds + + // The time (in seconds) the connection needs to remain idle before TCP starts sending keepalive probes + optval = 10; + rc = setsockopt(new_connection_socket.native_handle(), IPPROTO_TCP, TCP_KEEPALIVE, &optval, sizeof(optval)); + if (rc != 0) { + VSOMEIP_WARNING << "tcp_server_endpoint::connect: couldn't enable keep_alive(TCP_KEEPALIVE)" ; + } +#endif } if (!its_error) { {