From d4e1ab135dfcb058b476fce09a669f4be5854702 Mon Sep 17 00:00:00 2001 From: Sooraj Sinha <81695996+soosinha@users.noreply.github.com> Date: Tue, 19 Mar 2024 21:52:43 +0530 Subject: [PATCH] Election scheduler should be cancelled after cluster state publication (#11699) Signed-off-by: Sooraj Sinha --- CHANGELOG.md | 1 + .../cluster/coordination/Coordinator.java | 20 +++++++++++++++---- .../coordination/CoordinatorTests.java | 4 ++-- .../AbstractCoordinatorTestCase.java | 7 +++++++ 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4558cf3fe251..3fb141e189bd3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -146,6 +146,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Allow composite aggregation to run under a parent filter aggregation ([#11499](https://github.com/opensearch-project/OpenSearch/pull/11499)) - Quickly compute terms aggregations when the top-level query is functionally match-all for a segment ([#11643](https://github.com/opensearch-project/OpenSearch/pull/11643)) - Mark fuzzy filter GA and remove experimental setting ([12631](https://github.com/opensearch-project/OpenSearch/pull/12631)) +- Keep the election scheduler open until cluster state has been applied ([#11699](https://github.com/opensearch-project/OpenSearch/pull/11699)) ### Deprecated diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index 5a07f964f94a4..3d74feddfa261 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -386,6 +386,7 @@ public void onFailure(String source, Exception e) { @Override public void onSuccess(String source) { + closePrevotingAndElectionScheduler(); applyListener.onResponse(null); } }); @@ -472,17 +473,29 @@ private static Optional joinWithDestination(Optional lastJoin, Disco } private void closePrevotingAndElectionScheduler() { + closePrevoting(); + closeElectionScheduler(); + } + + private void closePrevoting() { if (prevotingRound != null) { prevotingRound.close(); prevotingRound = null; } + } + private void closeElectionScheduler() { if (electionScheduler != null) { electionScheduler.close(); electionScheduler = null; } } + // package-visible for testing + boolean isElectionSchedulerRunning() { + return electionScheduler != null; + } + private void updateMaxTermSeen(final long term) { synchronized (mutex) { maxTermSeen = Math.max(maxTermSeen, term); @@ -724,7 +737,7 @@ void becomeLeader(String method) { lastKnownLeader = Optional.of(getLocalNode()); peerFinder.deactivate(getLocalNode()); clusterFormationFailureHelper.stop(); - closePrevotingAndElectionScheduler(); + closePrevoting(); preVoteCollector.update(getPreVoteResponse(), getLocalNode()); assert leaderChecker.leader() == null : leaderChecker.leader(); @@ -761,7 +774,7 @@ void becomeFollower(String method, DiscoveryNode leaderNode) { lastKnownLeader = Optional.of(leaderNode); peerFinder.deactivate(leaderNode); clusterFormationFailureHelper.stop(); - closePrevotingAndElectionScheduler(); + closePrevoting(); cancelActivePublication("become follower: " + method); preVoteCollector.update(getPreVoteResponse(), leaderNode); @@ -927,7 +940,6 @@ public void invariant() { assert lastKnownLeader.isPresent() && lastKnownLeader.get().equals(getLocalNode()); assert joinAccumulator instanceof JoinHelper.LeaderJoinAccumulator; assert peerFinderLeader.equals(lastKnownLeader) : peerFinderLeader; - assert electionScheduler == null : electionScheduler; assert prevotingRound == null : prevotingRound; assert becomingClusterManager || getStateForClusterManagerService().nodes().getClusterManagerNodeId() != null : getStateForClusterManagerService(); @@ -972,7 +984,6 @@ assert getLocalNode().equals(applierState.nodes().getClusterManagerNode()) assert lastKnownLeader.isPresent() && (lastKnownLeader.get().equals(getLocalNode()) == false); assert joinAccumulator instanceof JoinHelper.FollowerJoinAccumulator; assert peerFinderLeader.equals(lastKnownLeader) : peerFinderLeader; - assert electionScheduler == null : electionScheduler; assert prevotingRound == null : prevotingRound; assert getStateForClusterManagerService().nodes().getClusterManagerNodeId() == null : getStateForClusterManagerService(); assert leaderChecker.currentNodeIsClusterManager() == false; @@ -1693,6 +1704,7 @@ public void onSuccess(String source) { updateMaxTermSeen(getCurrentTerm()); if (mode == Mode.LEADER) { + closePrevotingAndElectionScheduler(); // if necessary, abdicate to another node or improve the voting configuration boolean attemptReconfiguration = true; final ClusterState state = getLastAcceptedState(); // committed state diff --git a/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java index a3129655148ab..5eeebd2588416 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java @@ -270,7 +270,7 @@ public void testNodesJoinAfterStableCluster() { public void testExpandsConfigurationWhenGrowingFromOneNodeToThreeButDoesNotShrink() { try (Cluster cluster = new Cluster(1)) { cluster.runRandomly(); - cluster.stabilise(); + cluster.stabilise(DEFAULT_STABILISATION_TIME * 2); final ClusterNode leader = cluster.getAnyLeader(); @@ -1750,7 +1750,7 @@ public void testDoesNotPerformElectionWhenRestartingFollower() { public void testImproveConfigurationPerformsVotingConfigExclusionStateCheck() { try (Cluster cluster = new Cluster(1)) { cluster.runRandomly(); - cluster.stabilise(); + cluster.stabilise(DEFAULT_STABILISATION_TIME * 2); final Coordinator coordinator = cluster.getAnyLeader().coordinator; final ClusterState currentState = coordinator.getLastAcceptedState(); diff --git a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java index 28d7706fb1493..0754cc1793dc8 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java @@ -47,6 +47,7 @@ import org.opensearch.cluster.OpenSearchAllocationTestCase; import org.opensearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode; import org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration; +import org.opensearch.cluster.coordination.Coordinator.Mode; import org.opensearch.cluster.coordination.LinearizabilityChecker.History; import org.opensearch.cluster.coordination.LinearizabilityChecker.SequentialSpec; import org.opensearch.cluster.coordination.PersistedStateRegistry.PersistedStateType; @@ -653,6 +654,12 @@ void stabilise(long stabilisationDurationMillis) { leader.getLastAppliedClusterState().getNodes().nodeExists(nodeId) ); } + if (clusterNode.coordinator.getMode() == Mode.LEADER || clusterNode.coordinator.getMode() == Mode.FOLLOWER) { + assertFalse( + "Election scheduler should stop after cluster has stabilised", + clusterNode.coordinator.isElectionSchedulerRunning() + ); + } } final Set connectedNodeIds = clusterNodes.stream()