From 0691c8b4da23defe1b36ec90445852da1714010b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Svensson?= Date: Tue, 30 May 2023 13:28:06 +0200 Subject: [PATCH 1/2] Add a reconnect failure testcase A new testcase is added which simulates a temporary network problem which triggers node reconnects to fail. Update clusterclient to skip empty lines and comments and limit the maxretry to 2 (default 5). --- tests/CMakeLists.txt | 4 + tests/clusterclient.c | 25 +++--- tests/scripts/reconnect-failure-test.sh | 103 ++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 9 deletions(-) create mode 100755 tests/scripts/reconnect-failure-test.sh diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 63e27f4..68a8fa5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -220,6 +220,10 @@ add_test(NAME redirect-with-hostname-test COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/redirect-with-hostname-test.sh" "$" WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/") +add_test(NAME reconnect-failure-test + COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/reconnect-failure-test.sh" + "$" + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/") # This test can't be run on hiredis v1.1.0 due to hiredis issue #1171. # Disabling the testcase if hiredis contains the issue or if the version is unknown. add_test(NAME redirect-with-hostname-test-async diff --git a/tests/clusterclient.c b/tests/clusterclient.c index 58c5f82..3d214ad 100644 --- a/tests/clusterclient.c +++ b/tests/clusterclient.c @@ -22,23 +22,30 @@ int main(int argc, char **argv) { redisClusterSetOptionAddNodes(cc, initnode); redisClusterSetOptionConnectTimeout(cc, timeout); redisClusterSetOptionRouteUseSlots(cc); + redisClusterSetOptionMaxRetry(cc, 2); if (redisClusterConnect2(cc) != REDIS_OK) { fprintf(stderr, "Connect error: %s\n", cc->errstr); exit(100); } - char command[256]; - while (fgets(command, 256, stdin)) { - size_t len = strlen(command); - if (command[len - 1] == '\n') // Chop trailing line break - command[len - 1] = '\0'; - redisReply *reply = (redisReply *)redisClusterCommand(cc, command); + char cmd[256]; + while (fgets(cmd, 256, stdin)) { + size_t len = strlen(cmd); + if (cmd[len - 1] == '\n') // Chop trailing line break + cmd[len - 1] = '\0'; + + if (cmd[0] == '\0') /* Skip empty lines */ + continue; + if (cmd[0] == '#') /* Skip comments */ + continue; + + redisReply *reply = (redisReply *)redisClusterCommand(cc, cmd); if (cc->err) { - fprintf(stderr, "redisClusterCommand error: %s\n", cc->errstr); - exit(101); + printf("error: %s\n", cc->errstr); + } else { + printf("%s\n", reply->str); } - printf("%s\n", reply->str); freeReplyObject(reply); } diff --git a/tests/scripts/reconnect-failure-test.sh b/tests/scripts/reconnect-failure-test.sh new file mode 100755 index 0000000..8342e68 --- /dev/null +++ b/tests/scripts/reconnect-failure-test.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# +# Simulate a 2 node cluster. +# - Node 1 will handle slotmap updates and redirects. +# - Node 2 will simulate a short and temporary network issue +# that trigger the client to reconnect. +# +# Usage: $0 /path/to/clusterclient-binary + +clientprog=${1:-./clusterclient} +testname=reconnect-failure-test + +# Sync process just waiting for server to be ready to accept connection. +perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' & +syncpid1=$! +perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' & +syncpid2=$! + +# Start simulated redis node #1 +timeout 5s ./simulated-redis.pl -p 7401 -d --sigcont $syncpid1 <<'EOF' & +# Inital slotmap +EXPECT CONNECT +EXPECT ["CLUSTER", "SLOTS"] +SEND [[0, 6000, ["127.0.0.1", 7401, "nodeid1"]],[6001, 16383, ["127.0.0.1", 7402, "nodeid2"]]] +EXPECT CLOSE + +EXPECT CONNECT +EXPECT ["SET", "bar", "initial"] +SEND +OK + +# A reconnect failure triggers a search for an available node +EXPECT ["PING"] +SEND +PONG +EXPECT ["SET", "foo", "second"] +SEND -MOVED 12182 127.0.0.1:7402 + +# Since maxretry=2 a MOVED triggers a slotmap update (no slotmap change in this test) +EXPECT CONNECT +EXPECT ["CLUSTER", "SLOTS"] +SEND [[0, 6000, ["127.0.0.1", 7401, "nodeid1"]],[6001, 16383, ["127.0.0.1", 7402, "nodeid2"]]] +EXPECT CLOSE + +# A reconnect failure triggers a new search for an available node +EXPECT ["PING"] +SEND +PONG + +# Max retry exhausted + +EXPECT CLOSE +EOF +server1=$! + +# Start simulated redis node #2 +timeout 5s ./simulated-redis.pl -p 7402 -d --sigcont $syncpid2 <<'EOF' & +EXPECT CONNECT +EXPECT ["SET", "foo", "initial"] +SEND +OK +CLOSE +EOF +server2=$! + +# Wait until both nodes are ready to accept client connections +wait $syncpid1 $syncpid2; + +# Run client +timeout 4s "$clientprog" 127.0.0.1:7401 > "$testname.out" <<'EOF' +SET foo initial +SET bar initial + +SET foo first +SET foo second + +EOF +clientexit=$? + +# Wait for servers to exit +wait $server1; server1exit=$? +wait $server2; server2exit=$? + +# Check exit statuses +if [ $server1exit -ne 0 ]; then + echo "Simulated server #1 exited with status $server1exit" + exit $server1exit +fi +if [ $server2exit -ne 0 ]; then + echo "Simulated server #2 exited with status $server2exit" + exit $server2exit +fi +if [ $clientexit -ne 0 ]; then + echo "$clientprog exited with status $clientexit" + exit $clientexit +fi + +# Check the output from clusterclient +expected="OK +OK +error: Server closed the connection +error: too many cluster retries" + +cmp "$testname.out" <(echo "$expected") || exit 99 + +# Clean up +rm "$testname.out" From 41d1039e2dd4348713147fac9bad3cf9c8609bb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Svensson?= Date: Tue, 30 May 2023 13:39:54 +0200 Subject: [PATCH 2/2] Return reconnection failures immediately --- hircluster.c | 5 ++++- tests/scripts/reconnect-failure-test.sh | 20 +------------------- 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/hircluster.c b/hircluster.c index f907e1c..bf80866 100644 --- a/hircluster.c +++ b/hircluster.c @@ -1902,7 +1902,10 @@ redisContext *ctx_get_by_node(redisClusterContext *cc, redisClusterNode *node) { c = node->con; if (c != NULL) { if (c->err) { - redisReconnect(c); + if (redisReconnect(c) != REDIS_OK) { + __redisClusterSetError(cc, c->err, c->errstr); + return NULL; + } if (cc->ssl && cc->ssl_init_fn(c, cc->ssl) != REDIS_OK) { __redisClusterSetError(cc, c->err, c->errstr); diff --git a/tests/scripts/reconnect-failure-test.sh b/tests/scripts/reconnect-failure-test.sh index 8342e68..a889c3b 100755 --- a/tests/scripts/reconnect-failure-test.sh +++ b/tests/scripts/reconnect-failure-test.sh @@ -28,24 +28,6 @@ EXPECT CONNECT EXPECT ["SET", "bar", "initial"] SEND +OK -# A reconnect failure triggers a search for an available node -EXPECT ["PING"] -SEND +PONG -EXPECT ["SET", "foo", "second"] -SEND -MOVED 12182 127.0.0.1:7402 - -# Since maxretry=2 a MOVED triggers a slotmap update (no slotmap change in this test) -EXPECT CONNECT -EXPECT ["CLUSTER", "SLOTS"] -SEND [[0, 6000, ["127.0.0.1", 7401, "nodeid1"]],[6001, 16383, ["127.0.0.1", 7402, "nodeid2"]]] -EXPECT CLOSE - -# A reconnect failure triggers a new search for an available node -EXPECT ["PING"] -SEND +PONG - -# Max retry exhausted - EXPECT CLOSE EOF server1=$! @@ -95,7 +77,7 @@ fi expected="OK OK error: Server closed the connection -error: too many cluster retries" +error: Connection refused" cmp "$testname.out" <(echo "$expected") || exit 99