From 82856f400283a9a11dfc325c865a37c1c153651c Mon Sep 17 00:00:00 2001 From: Qing Yang Date: Wed, 10 Feb 2021 09:44:51 -0500 Subject: [PATCH 01/11] initial fix --- ..._multiple_version_protocol_feature_test.py | 338 ++++++++++-------- 1 file changed, 191 insertions(+), 147 deletions(-) diff --git a/tests/nodeos_multiple_version_protocol_feature_test.py b/tests/nodeos_multiple_version_protocol_feature_test.py index 18711d30148..302a3630b13 100755 --- a/tests/nodeos_multiple_version_protocol_feature_test.py +++ b/tests/nodeos_multiple_version_protocol_feature_test.py @@ -13,6 +13,11 @@ from os.path import join, exists from datetime import datetime +import multiprocessing +core_num = multiprocessing.cpu_count() + + +Utils.Print("### BEGIN multiversion test ###") ############################################################### # nodeos_multiple_version_protocol_feature_test # @@ -20,22 +25,6 @@ # ############################################################### -# Parse command line arguments -args = TestHelper.parse_args({"-v","--clean-run","--dump-error-details","--leave-running", - "--keep-logs", "--alternate-version-labels-file"}) -Utils.Debug=args.v -killAll=args.clean_run -dumpErrorDetails=args.dump_error_details -dontKill=args.leave_running -killEosInstances=not dontKill -killWallet=not dontKill -keepLogs=args.keep_logs -alternateVersionLabelsFile=args.alternate_version_labels_file - -walletMgr=WalletMgr(True) -cluster=Cluster(walletd=True) -cluster.setWalletMgr(walletMgr) - def restartNode(node: Node, chainArg=None, addSwapFlags=None, nodeosPath=None): if not node.killed: node.kill(signal.SIGTERM) @@ -45,9 +34,14 @@ def restartNode(node: Node, chainArg=None, addSwapFlags=None, nodeosPath=None): def shouldNodeContainPreactivateFeature(node): preactivateFeatureDigest = node.getSupportedProtocolFeatureDict()["PREACTIVATE_FEATURE"]["feature_digest"] - assert preactivateFeatureDigest + assert preactivateFeatureDigest, "preactivateFeatureDigest should not be empty" + Utils.Print("preactivateFeatureDigest: {}".format(preactivateFeatureDigest)) blockHeaderState = node.getLatestBlockHeaderState() + assert blockHeaderState, "blockHeaderState should not be empty" activatedProtocolFeatures = blockHeaderState["activated_protocol_features"]["protocol_features"] + Utils.Print("activatedProtocolFeatures size: {}".format(len(activatedProtocolFeatures))) + for f in activatedProtocolFeatures: + Utils.Print("activatedProtocolFeature: {}".format(f)) return preactivateFeatureDigest in activatedProtocolFeatures def waitUntilBeginningOfProdTurn(node, producerName, timeout=30, sleepTime=0.4): @@ -77,133 +71,183 @@ def hasBlockBecomeIrr(): return node.getIrreversibleBlockNum() >= blockNum return Utils.waitForTruth(hasBlockBecomeIrr, timeout) -# List to contain the test result message -testSuccessful = False -try: - TestHelper.printSystemInfo("BEGIN") - cluster.killall(allInstances=killAll) - cluster.cleanup() - - # Create a cluster of 4 nodes, each node has 1 producer. The first 3 nodes use the latest vesion, - # While the 4th node use the version that doesn't support protocol feature activation (i.e. 1.7.0) - associatedNodeLabels = { - "3": "170" - } - Utils.Print("Alternate Version Labels File is {}".format(alternateVersionLabelsFile)) - assert exists(alternateVersionLabelsFile), "Alternate version labels file does not exist" - # version 1.7 did not provide a default value for "--last-block-time-offset-us" so this is needed to - # avoid dropping late blocks - assert cluster.launch(pnodes=4, totalNodes=4, prodCount=1, totalProducers=4, - extraNodeosArgs=" --plugin eosio::producer_api_plugin ", - useBiosBootFile=False, - specificExtraNodeosArgs={ - 0:"--http-max-response-time-ms 990000", - 1:"--http-max-response-time-ms 990000", - 2:"--http-max-response-time-ms 990000", - 3:"--last-block-time-offset-us -200000"}, - onlySetProds=True, - pfSetupPolicy=PFSetupPolicy.NONE, - alternateVersionLabelsFile=alternateVersionLabelsFile, - associatedNodeLabels=associatedNodeLabels), "Unable to launch cluster" - - newNodeIds = [0, 1, 2] - oldNodeId = 3 - newNodes = list(map(lambda id: cluster.getNode(id), newNodeIds)) - oldNode = cluster.getNode(oldNodeId) - allNodes = [*newNodes, oldNode] - - def pauseBlockProductions(): - for node in allNodes: - if not node.killed: node.processCurlCmd("producer", "pause", "") - - def resumeBlockProductions(): - for node in allNodes: - if not node.killed: node.processCurlCmd("producer", "resume", "") - - def areNodesInSync(nodes:[Node]): - # Pause all block production to ensure the head is not moving - pauseBlockProductions() - time.sleep(2) # Wait for some time to ensure all blocks are propagated - headBlockIds = [] - for node in nodes: - headBlockId = node.getInfo()["head_block_id"] - headBlockIds.append(headBlockId) - resumeBlockProductions() - return len(set(headBlockIds)) == 1 - - # Before everything starts, all nodes (new version and old version) should be in sync - assert areNodesInSync(allNodes), "Nodes are not in sync before preactivation" - - # First, we are going to test the case where: - # - 1st node has valid earliest_allowed_activation_time - # - While 2nd and 3rd node have invalid earliest_allowed_activation_time - # Producer in the 1st node is going to activate PREACTIVATE_FEATURE during his turn - # Immediately, in the next block PREACTIVATE_FEATURE should be active in 1st node, but not on 2nd and 3rd - # Therefore, 1st node will be out of sync with 2nd, 3rd, and 4th node - # After a round has passed though, 1st node will realize he's in minority fork and then join the other nodes - # Hence, the PREACTIVATE_FEATURE that was previously activated will be dropped and all of the nodes should be in sync - setValidityOfActTimeSubjRestriction(newNodes[1], "PREACTIVATE_FEATURE", False) - setValidityOfActTimeSubjRestriction(newNodes[2], "PREACTIVATE_FEATURE", False) - - waitUntilBeginningOfProdTurn(newNodes[0], "defproducera") - newNodes[0].activatePreactivateFeature() - assert shouldNodeContainPreactivateFeature(newNodes[0]), "1st node should contain PREACTIVATE FEATURE" - assert not (shouldNodeContainPreactivateFeature(newNodes[1]) or shouldNodeContainPreactivateFeature(newNodes[2])), \ - "2nd and 3rd node should not contain PREACTIVATE FEATURE" - assert areNodesInSync([newNodes[1], newNodes[2], oldNode]), "2nd, 3rd and 4th node should be in sync" - assert not areNodesInSync(allNodes), "1st node should be out of sync with the rest nodes" - - waitForOneRound() - - assert not shouldNodeContainPreactivateFeature(newNodes[0]), "PREACTIVATE_FEATURE should be dropped" - assert areNodesInSync(allNodes), "All nodes should be in sync" - - # Then we set the earliest_allowed_activation_time of 2nd node and 3rd node with valid value - # Once the 1st node activate PREACTIVATE_FEATURE, all of them should have PREACTIVATE_FEATURE activated in the next block - # They will be in sync and their LIB will advance since they control > 2/3 of the producers - # Also the LIB should be able to advance past the block that contains PREACTIVATE_FEATURE - # However, the 4th node will be out of sync with them, and its LIB will stuck - setValidityOfActTimeSubjRestriction(newNodes[1], "PREACTIVATE_FEATURE", True) - setValidityOfActTimeSubjRestriction(newNodes[2], "PREACTIVATE_FEATURE", True) - - waitUntilBeginningOfProdTurn(newNodes[0], "defproducera") - libBeforePreactivation = newNodes[0].getIrreversibleBlockNum() - newNodes[0].activatePreactivateFeature() - - assert areNodesInSync(newNodes), "New nodes should be in sync" - assert not areNodesInSync(allNodes), "Nodes should not be in sync after preactivation" - for node in newNodes: assert shouldNodeContainPreactivateFeature(node), "New node should contain PREACTIVATE_FEATURE" - - activatedBlockNum = newNodes[0].getHeadBlockNum() # The PREACTIVATE_FEATURE should have been activated before or at this block num - assert waitUntilBlockBecomeIrr(newNodes[0], activatedBlockNum), \ - "1st node LIB should be able to advance past the block that contains PREACTIVATE_FEATURE" - assert newNodes[1].getIrreversibleBlockNum() >= activatedBlockNum and \ - newNodes[2].getIrreversibleBlockNum() >= activatedBlockNum, \ - "2nd and 3rd node LIB should also be able to advance past the block that contains PREACTIVATE_FEATURE" - assert oldNode.getIrreversibleBlockNum() <= libBeforePreactivation, \ - "4th node LIB should stuck on LIB before PREACTIVATE_FEATURE is activated" - - # Restart old node with newest version - # Before we are migrating to new version, use --export-reversible-blocks as the old version - # and --import-reversible-blocks with the new version to ensure the compatibility of the reversible blocks - # Finally, when we restart the 4th node with the version of nodeos that supports protocol feature, - # all nodes should be in sync, and the 4th node will also contain PREACTIVATE_FEATURE - portableRevBlkPath = os.path.join(Utils.getNodeDataDir(oldNodeId), "rev_blk_portable_format") - oldNode.kill(signal.SIGTERM) - # Note, for the following relaunch, these will fail to relaunch immediately (expected behavior of export/import), so the chainArg will not replace the old cmd - oldNode.relaunch(chainArg="--export-reversible-blocks {}".format(portableRevBlkPath), timeout=1) - oldNode.relaunch(chainArg="--import-reversible-blocks {}".format(portableRevBlkPath), timeout=1, nodeosPath="programs/nodeos/nodeos") - os.remove(portableRevBlkPath) - - restartNode(oldNode, chainArg="--replay", nodeosPath="programs/nodeos/nodeos") - time.sleep(2) # Give some time to replay - - assert areNodesInSync(allNodes), "All nodes should be in sync" - assert shouldNodeContainPreactivateFeature(oldNode), "4th node should contain PREACTIVATE_FEATURE" - - testSuccessful = True -finally: - TestHelper.shutdown(cluster, walletMgr, testSuccessful, killEosInstances, killWallet, keepLogs, killAll, dumpErrorDetails) - -exitCode = 0 if testSuccessful else 1 -exit(exitCode) +def pauseBlockProductions(allNodes): + for node in allNodes: + if not node.killed: + Utils.Print("** before node pause, hbi {}, head# {} **".format(node.getInfo()["head_block_id"], node.getHeadBlockNum())) + node.processCurlCmd("producer", "pause", "") + Utils.Print("** after node pause, hbi {}, head# {} **".format(node.getInfo()["head_block_id"], node.getHeadBlockNum())) + time.sleep(2) # Wait for some time to ensure all blocks are propagated + + +def resumeBlockProductions(allNodes): + for node in allNodes: + if not node.killed: + Utils.Print("** before node resume, hbi {}, head# {} **".format(node.getInfo()["head_block_id"], node.getHeadBlockNum())) + node.processCurlCmd("producer", "resume", "") + Utils.Print("** after node resume, hbi {}, head# {} **".format(node.getInfo()["head_block_id"], node.getHeadBlockNum())) + +def areNodesInSync(nodes:[Node]): + Utils.Print("*** CHECK areNodesInSync") + # Pause all block production to ensure the head is not moving + #pauseBlockProductions() + #time.sleep(2) # Wait for some time to ensure all blocks are propagated + headBlockIds = [] + for node in nodes: + headBlockId = node.getInfo()["head_block_id"] + headBlockIds.append(headBlockId) + #resumeBlockProductions() + for hbi in headBlockIds: + Utils.Print("* headbBockId: {} *".format(hbi)) + return len(set(headBlockIds)) == 1 + +def main(): + # Parse command line arguments + args = TestHelper.parse_args({"-v","--clean-run","--dump-error-details","--leave-running", + "--keep-logs", "--alternate-version-labels-file"}) + Utils.Debug=args.v + killAll=args.clean_run + dumpErrorDetails=args.dump_error_details + dontKill=args.leave_running + killEosInstances=not dontKill + killWallet=not dontKill + keepLogs=args.keep_logs + alternateVersionLabelsFile=args.alternate_version_labels_file + + walletMgr=WalletMgr(True) + cluster=Cluster(walletd=True) + cluster.setWalletMgr(walletMgr) + + # List to contain the test result message + testSuccessful = False + try: + TestHelper.printSystemInfo("BEGIN") + cluster.killall(allInstances=killAll) + cluster.cleanup() + + # Create a cluster of 4 nodes, each node has 1 producer. The first 3 nodes use the latest vesion, + # While the 4th node use the version that doesn't support protocol feature activation (i.e. 1.7.0) + associatedNodeLabels = { + "3": "170" + } + Utils.Print("Alternate Version Labels File is {}".format(alternateVersionLabelsFile)) + assert exists(alternateVersionLabelsFile), "Alternate version labels file does not exist" + # version 1.7 did not provide a default value for "--last-block-time-offset-us" so this is needed to + # avoid dropping late blocks + assert cluster.launch(pnodes=4, totalNodes=4, prodCount=1, totalProducers=4, + extraNodeosArgs=" --plugin eosio::producer_api_plugin ", + useBiosBootFile=False, + specificExtraNodeosArgs={ + 0:"--http-max-response-time-ms 990000", + 1:"--http-max-response-time-ms 990000", + 2:"--http-max-response-time-ms 990000", + 3:"--last-block-time-offset-us -200000"}, + onlySetProds=True, + pfSetupPolicy=PFSetupPolicy.NONE, + alternateVersionLabelsFile=alternateVersionLabelsFile, + associatedNodeLabels=associatedNodeLabels), "Unable to launch cluster" + + newNodeIds = [0, 1, 2] + oldNodeId = 3 + newNodes = list(map(lambda id: cluster.getNode(id), newNodeIds)) + oldNode = cluster.getNode(oldNodeId) + allNodes = [*newNodes, oldNode] + + Utils.Print("+++ Nodes are in sync before preactivation +++") + # Before everything starts, all nodes (new version and old version) should be in sync + pauseBlockProductions(allNodes) + assert areNodesInSync(allNodes), "Nodes are not in sync before preactivation" + resumeBlockProductions(allNodes) + + # First, we are going to test the case where: + # - 1st node has valid earliest_allowed_activation_time + # - While 2nd and 3rd node have invalid earliest_allowed_activation_time + # Producer in the 1st node is going to activate PREACTIVATE_FEATURE during his turn + # Immediately, in the next block PREACTIVATE_FEATURE should be active in 1st node, but not on 2nd and 3rd + # Therefore, 1st node will be out of sync with 2nd, 3rd, and 4th node + # After a round has passed though, 1st node will realize he's in minority fork and then join the other nodes + # Hence, the PREACTIVATE_FEATURE that was previously activated will be dropped and all of the nodes should be in sync + + Utils.Print("+++ 1st Node should contain PREACTIVATE FEATURE +++") + setValidityOfActTimeSubjRestriction(newNodes[1], "PREACTIVATE_FEATURE", False) + setValidityOfActTimeSubjRestriction(newNodes[2], "PREACTIVATE_FEATURE", False) + + waitUntilBeginningOfProdTurn(newNodes[0], "defproducera") + newNodes[0].activatePreactivateFeature() + assert shouldNodeContainPreactivateFeature(newNodes[0]), "1st node should contain PREACTIVATE FEATURE" + assert not (shouldNodeContainPreactivateFeature(newNodes[1]) or shouldNodeContainPreactivateFeature(newNodes[2])), \ + "2nd and 3rd node should not contain PREACTIVATE FEATURE" + Utils.Print("+++ 2nd, 3rd and 4th node should be in sync +++") + pauseBlockProductions(allNodes) + assert areNodesInSync([newNodes[1], newNodes[2], oldNode]), "2nd, 3rd and 4th node should be in sync" + Utils.Print("+++ 1st node should be out of sync with the rest nodes +++") + assert not areNodesInSync(allNodes), "1st node should be out of sync with the rest nodes" + resumeBlockProductions(allNodes) + + waitForOneRound() + + assert not shouldNodeContainPreactivateFeature(newNodes[0]), "PREACTIVATE_FEATURE should be dropped" + + pauseBlockProductions(allNodes) + assert areNodesInSync(allNodes), "All nodes should be in sync" + resumeBlockProductions(allNodes) + + # Then we set the earliest_allowed_activation_time of 2nd node and 3rd node with valid value + # Once the 1st node activate PREACTIVATE_FEATURE, all of them should have PREACTIVATE_FEATURE activated in the next block + # They will be in sync and their LIB will advance since they control > 2/3 of the producers + # Also the LIB should be able to advance past the block that contains PREACTIVATE_FEATURE + # However, the 4th node will be out of sync with them, and its LIB will stuck + setValidityOfActTimeSubjRestriction(newNodes[1], "PREACTIVATE_FEATURE", True) + setValidityOfActTimeSubjRestriction(newNodes[2], "PREACTIVATE_FEATURE", True) + + waitUntilBeginningOfProdTurn(newNodes[0], "defproducera") + libBeforePreactivation = newNodes[0].getIrreversibleBlockNum() + newNodes[0].activatePreactivateFeature() + + pauseBlockProductions(allNodes) + assert areNodesInSync(newNodes), "New nodes should be in sync" + assert not areNodesInSync(allNodes), "Nodes should not be in sync after preactivation" + resumeBlockProductions(allNodes) + + for node in newNodes: assert shouldNodeContainPreactivateFeature(node), "New node should contain PREACTIVATE_FEATURE" + + activatedBlockNum = newNodes[0].getHeadBlockNum() # The PREACTIVATE_FEATURE should have been activated before or at this block num + assert waitUntilBlockBecomeIrr(newNodes[0], activatedBlockNum), \ + "1st node LIB should be able to advance past the block that contains PREACTIVATE_FEATURE" + assert newNodes[1].getIrreversibleBlockNum() >= activatedBlockNum and \ + newNodes[2].getIrreversibleBlockNum() >= activatedBlockNum, \ + "2nd and 3rd node LIB should also be able to advance past the block that contains PREACTIVATE_FEATURE" + assert oldNode.getIrreversibleBlockNum() <= libBeforePreactivation, \ + "4th node LIB should stuck on LIB before PREACTIVATE_FEATURE is activated" + + # Restart old node with newest version + # Before we are migrating to new version, use --export-reversible-blocks as the old version + # and --import-reversible-blocks with the new version to ensure the compatibility of the reversible blocks + # Finally, when we restart the 4th node with the version of nodeos that supports protocol feature, + # all nodes should be in sync, and the 4th node will also contain PREACTIVATE_FEATURE + portableRevBlkPath = os.path.join(Utils.getNodeDataDir(oldNodeId), "rev_blk_portable_format") + oldNode.kill(signal.SIGTERM) + # Note, for the following relaunch, these will fail to relaunch immediately (expected behavior of export/import), so the chainArg will not replace the old cmd + oldNode.relaunch(chainArg="--export-reversible-blocks {}".format(portableRevBlkPath), timeout=1) + oldNode.relaunch(chainArg="--import-reversible-blocks {}".format(portableRevBlkPath), timeout=1, nodeosPath="programs/nodeos/nodeos") + os.remove(portableRevBlkPath) + + restartNode(oldNode, chainArg="--replay", nodeosPath="programs/nodeos/nodeos") + time.sleep(2) # Give some time to replay + + pauseBlockProductions(allNodes) + assert areNodesInSync(allNodes), "All nodes should be in sync" + resumeBlockProductions(allNodes) + assert shouldNodeContainPreactivateFeature(oldNode), "4th node should contain PREACTIVATE_FEATURE" + + testSuccessful = True + finally: + TestHelper.shutdown(cluster, walletMgr, testSuccessful, killEosInstances, killWallet, keepLogs, killAll, dumpErrorDetails) + + Utils.Print("### END multiversion test ###") + + exitCode = 0 if testSuccessful else 1 + exit(exitCode) + +if __name__ == "__main__": + main() From ec768bb9a53f5096fa79308eea9f2b3f351ed005 Mon Sep 17 00:00:00 2001 From: Qing Yang Date: Wed, 10 Feb 2021 13:34:21 -0500 Subject: [PATCH 02/11] test ROUNDS for buildkite --- .cicd/generate-pipeline.sh | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/.cicd/generate-pipeline.sh b/.cicd/generate-pipeline.sh index 47a710f2b5a..5d80d9c204c 100755 --- a/.cicd/generate-pipeline.sh +++ b/.cicd/generate-pipeline.sh @@ -418,28 +418,27 @@ EOF IFS=$nIFS done IFS=$oIFS + if [[ ! "$PINNED" == 'false' || "$SKIP_MULTIVERSION_TEST" == 'false' ]]; then + cat < Date: Wed, 10 Feb 2021 13:51:17 -0500 Subject: [PATCH 03/11] fix cicd script format for testing --- .cicd/generate-pipeline.sh | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/.cicd/generate-pipeline.sh b/.cicd/generate-pipeline.sh index 5d80d9c204c..500d3616518 100755 --- a/.cicd/generate-pipeline.sh +++ b/.cicd/generate-pipeline.sh @@ -420,18 +420,19 @@ EOF IFS=$oIFS if [[ ! "$PINNED" == 'false' || "$SKIP_MULTIVERSION_TEST" == 'false' ]]; then cat < Date: Wed, 10 Feb 2021 14:14:43 -0500 Subject: [PATCH 04/11] fix eof indent for testing --- .cicd/generate-pipeline.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cicd/generate-pipeline.sh b/.cicd/generate-pipeline.sh index 500d3616518..df28b16d317 100755 --- a/.cicd/generate-pipeline.sh +++ b/.cicd/generate-pipeline.sh @@ -432,7 +432,7 @@ EOF timeout: ${TIMEOUT:-30} skip: ${SKIP_LINUX}${SKIP_UBUNTU_18_04}${SKIP_MULTIVERSION_TEST} - EOF +EOF fi if [[ "$ROUND" != "$ROUNDS" ]]; then echo ' - wait' From b3c6fcd58fb6abf4ab4dac554d4386c9f9b78242 Mon Sep 17 00:00:00 2001 From: Qing Yang Date: Thu, 11 Feb 2021 15:27:45 -0500 Subject: [PATCH 05/11] fix the checking on preactivate feature and nodes sync --- ..._multiple_version_protocol_feature_test.py | 385 +++++++++--------- 1 file changed, 198 insertions(+), 187 deletions(-) diff --git a/tests/nodeos_multiple_version_protocol_feature_test.py b/tests/nodeos_multiple_version_protocol_feature_test.py index 302a3630b13..7b639ef8e4e 100755 --- a/tests/nodeos_multiple_version_protocol_feature_test.py +++ b/tests/nodeos_multiple_version_protocol_feature_test.py @@ -13,10 +13,6 @@ from os.path import join, exists from datetime import datetime -import multiprocessing -core_num = multiprocessing.cpu_count() - - Utils.Print("### BEGIN multiversion test ###") ############################################################### # nodeos_multiple_version_protocol_feature_test @@ -25,6 +21,22 @@ # ############################################################### +# Parse command line arguments +args = TestHelper.parse_args({"-v","--clean-run","--dump-error-details","--leave-running", + "--keep-logs", "--alternate-version-labels-file"}) +Utils.Debug=args.v +killAll=args.clean_run +dumpErrorDetails=args.dump_error_details +dontKill=args.leave_running +killEosInstances=not dontKill +killWallet=not dontKill +keepLogs=args.keep_logs +alternateVersionLabelsFile=args.alternate_version_labels_file + +walletMgr=WalletMgr(True) +cluster=Cluster(walletd=True) +cluster.setWalletMgr(walletMgr) + def restartNode(node: Node, chainArg=None, addSwapFlags=None, nodeosPath=None): if not node.killed: node.kill(signal.SIGTERM) @@ -44,13 +56,17 @@ def shouldNodeContainPreactivateFeature(node): Utils.Print("activatedProtocolFeature: {}".format(f)) return preactivateFeatureDigest in activatedProtocolFeatures +waitUntilBeginningOfProdTurn_head = 0 def waitUntilBeginningOfProdTurn(node, producerName, timeout=30, sleepTime=0.4): def isDesiredProdTurn(): - headBlockNum = node.getHeadBlockNum() - res = node.getBlock(headBlockNum)["producer"] == producerName and \ - node.getBlock(headBlockNum-1)["producer"] != producerName + #headBlockNum = node.getHeadBlockNum() + waitUntilBeginningOfProdTurn_head = node.getHeadBlockNum() + res = node.getBlock(waitUntilBeginningOfProdTurn_head)["producer"] == producerName and \ + node.getBlock(waitUntilBeginningOfProdTurn_head-1)["producer"] != producerName return res - Utils.waitForTruth(isDesiredProdTurn, timeout, sleepTime) + #Utils.waitForTruth(isDesiredProdTurn, timeout, sleepTime) + ret = Utils.waitForTruth(isDesiredProdTurn, timeout, sleepTime) + assert ret != None, "Expected producer to arrive within 19 seconds (3 other producers)" def waitForOneRound(): time.sleep(24) # We have 4 producers for this test @@ -71,183 +87,178 @@ def hasBlockBecomeIrr(): return node.getIrreversibleBlockNum() >= blockNum return Utils.waitForTruth(hasBlockBecomeIrr, timeout) -def pauseBlockProductions(allNodes): - for node in allNodes: - if not node.killed: - Utils.Print("** before node pause, hbi {}, head# {} **".format(node.getInfo()["head_block_id"], node.getHeadBlockNum())) - node.processCurlCmd("producer", "pause", "") - Utils.Print("** after node pause, hbi {}, head# {} **".format(node.getInfo()["head_block_id"], node.getHeadBlockNum())) - time.sleep(2) # Wait for some time to ensure all blocks are propagated - - -def resumeBlockProductions(allNodes): - for node in allNodes: - if not node.killed: - Utils.Print("** before node resume, hbi {}, head# {} **".format(node.getInfo()["head_block_id"], node.getHeadBlockNum())) - node.processCurlCmd("producer", "resume", "") - Utils.Print("** after node resume, hbi {}, head# {} **".format(node.getInfo()["head_block_id"], node.getHeadBlockNum())) - -def areNodesInSync(nodes:[Node]): - Utils.Print("*** CHECK areNodesInSync") - # Pause all block production to ensure the head is not moving - #pauseBlockProductions() - #time.sleep(2) # Wait for some time to ensure all blocks are propagated - headBlockIds = [] - for node in nodes: - headBlockId = node.getInfo()["head_block_id"] - headBlockIds.append(headBlockId) - #resumeBlockProductions() - for hbi in headBlockIds: - Utils.Print("* headbBockId: {} *".format(hbi)) - return len(set(headBlockIds)) == 1 - -def main(): - # Parse command line arguments - args = TestHelper.parse_args({"-v","--clean-run","--dump-error-details","--leave-running", - "--keep-logs", "--alternate-version-labels-file"}) - Utils.Debug=args.v - killAll=args.clean_run - dumpErrorDetails=args.dump_error_details - dontKill=args.leave_running - killEosInstances=not dontKill - killWallet=not dontKill - keepLogs=args.keep_logs - alternateVersionLabelsFile=args.alternate_version_labels_file - - walletMgr=WalletMgr(True) - cluster=Cluster(walletd=True) - cluster.setWalletMgr(walletMgr) - - # List to contain the test result message - testSuccessful = False - try: - TestHelper.printSystemInfo("BEGIN") - cluster.killall(allInstances=killAll) - cluster.cleanup() - - # Create a cluster of 4 nodes, each node has 1 producer. The first 3 nodes use the latest vesion, - # While the 4th node use the version that doesn't support protocol feature activation (i.e. 1.7.0) - associatedNodeLabels = { - "3": "170" - } - Utils.Print("Alternate Version Labels File is {}".format(alternateVersionLabelsFile)) - assert exists(alternateVersionLabelsFile), "Alternate version labels file does not exist" - # version 1.7 did not provide a default value for "--last-block-time-offset-us" so this is needed to - # avoid dropping late blocks - assert cluster.launch(pnodes=4, totalNodes=4, prodCount=1, totalProducers=4, - extraNodeosArgs=" --plugin eosio::producer_api_plugin ", - useBiosBootFile=False, - specificExtraNodeosArgs={ - 0:"--http-max-response-time-ms 990000", - 1:"--http-max-response-time-ms 990000", - 2:"--http-max-response-time-ms 990000", - 3:"--last-block-time-offset-us -200000"}, - onlySetProds=True, - pfSetupPolicy=PFSetupPolicy.NONE, - alternateVersionLabelsFile=alternateVersionLabelsFile, - associatedNodeLabels=associatedNodeLabels), "Unable to launch cluster" - - newNodeIds = [0, 1, 2] - oldNodeId = 3 - newNodes = list(map(lambda id: cluster.getNode(id), newNodeIds)) - oldNode = cluster.getNode(oldNodeId) - allNodes = [*newNodes, oldNode] - - Utils.Print("+++ Nodes are in sync before preactivation +++") - # Before everything starts, all nodes (new version and old version) should be in sync - pauseBlockProductions(allNodes) - assert areNodesInSync(allNodes), "Nodes are not in sync before preactivation" - resumeBlockProductions(allNodes) - - # First, we are going to test the case where: - # - 1st node has valid earliest_allowed_activation_time - # - While 2nd and 3rd node have invalid earliest_allowed_activation_time - # Producer in the 1st node is going to activate PREACTIVATE_FEATURE during his turn - # Immediately, in the next block PREACTIVATE_FEATURE should be active in 1st node, but not on 2nd and 3rd - # Therefore, 1st node will be out of sync with 2nd, 3rd, and 4th node - # After a round has passed though, 1st node will realize he's in minority fork and then join the other nodes - # Hence, the PREACTIVATE_FEATURE that was previously activated will be dropped and all of the nodes should be in sync - - Utils.Print("+++ 1st Node should contain PREACTIVATE FEATURE +++") - setValidityOfActTimeSubjRestriction(newNodes[1], "PREACTIVATE_FEATURE", False) - setValidityOfActTimeSubjRestriction(newNodes[2], "PREACTIVATE_FEATURE", False) - - waitUntilBeginningOfProdTurn(newNodes[0], "defproducera") - newNodes[0].activatePreactivateFeature() - assert shouldNodeContainPreactivateFeature(newNodes[0]), "1st node should contain PREACTIVATE FEATURE" - assert not (shouldNodeContainPreactivateFeature(newNodes[1]) or shouldNodeContainPreactivateFeature(newNodes[2])), \ - "2nd and 3rd node should not contain PREACTIVATE FEATURE" - Utils.Print("+++ 2nd, 3rd and 4th node should be in sync +++") - pauseBlockProductions(allNodes) - assert areNodesInSync([newNodes[1], newNodes[2], oldNode]), "2nd, 3rd and 4th node should be in sync" - Utils.Print("+++ 1st node should be out of sync with the rest nodes +++") - assert not areNodesInSync(allNodes), "1st node should be out of sync with the rest nodes" - resumeBlockProductions(allNodes) - - waitForOneRound() - - assert not shouldNodeContainPreactivateFeature(newNodes[0]), "PREACTIVATE_FEATURE should be dropped" - - pauseBlockProductions(allNodes) - assert areNodesInSync(allNodes), "All nodes should be in sync" - resumeBlockProductions(allNodes) - - # Then we set the earliest_allowed_activation_time of 2nd node and 3rd node with valid value - # Once the 1st node activate PREACTIVATE_FEATURE, all of them should have PREACTIVATE_FEATURE activated in the next block - # They will be in sync and their LIB will advance since they control > 2/3 of the producers - # Also the LIB should be able to advance past the block that contains PREACTIVATE_FEATURE - # However, the 4th node will be out of sync with them, and its LIB will stuck - setValidityOfActTimeSubjRestriction(newNodes[1], "PREACTIVATE_FEATURE", True) - setValidityOfActTimeSubjRestriction(newNodes[2], "PREACTIVATE_FEATURE", True) - - waitUntilBeginningOfProdTurn(newNodes[0], "defproducera") - libBeforePreactivation = newNodes[0].getIrreversibleBlockNum() +# List to contain the test result message +testSuccessful = False +try: + TestHelper.printSystemInfo("BEGIN") + cluster.killall(allInstances=killAll) + cluster.cleanup() + + # Create a cluster of 4 nodes, each node has 1 producer. The first 3 nodes use the latest vesion, + # While the 4th node use the version that doesn't support protocol feature activation (i.e. 1.7.0) + associatedNodeLabels = { + "3": "170" + } + Utils.Print("Alternate Version Labels File is {}".format(alternateVersionLabelsFile)) + assert exists(alternateVersionLabelsFile), "Alternate version labels file does not exist" + # version 1.7 did not provide a default value for "--last-block-time-offset-us" so this is needed to + # avoid dropping late blocks + assert cluster.launch(pnodes=4, totalNodes=4, prodCount=1, totalProducers=4, + extraNodeosArgs=" --plugin eosio::producer_api_plugin ", + useBiosBootFile=False, + specificExtraNodeosArgs={ + 0:"--http-max-response-time-ms 990000", + 1:"--http-max-response-time-ms 990000", + 2:"--http-max-response-time-ms 990000", + 3:"--last-block-time-offset-us -200000"}, + onlySetProds=True, + pfSetupPolicy=PFSetupPolicy.NONE, + alternateVersionLabelsFile=alternateVersionLabelsFile, + associatedNodeLabels=associatedNodeLabels), "Unable to launch cluster" + + newNodeIds = [0, 1, 2] + oldNodeId = 3 + newNodes = list(map(lambda id: cluster.getNode(id), newNodeIds)) + oldNode = cluster.getNode(oldNodeId) + allNodes = [*newNodes, oldNode] + + def pauseBlockProductions(): + for node in allNodes: + if not node.killed: + Utils.Print("** before node pause, hbi {}, head# {} **".format(node.getInfo()["head_block_id"], node.getHeadBlockNum())) + node.processCurlCmd("producer", "pause", "") + Utils.Print("** after node pause, hbi {}, head# {} **".format(node.getInfo()["head_block_id"], node.getHeadBlockNum())) + + def resumeBlockProductions(): + for node in allNodes: + if not node.killed: + Utils.Print("** before node resume, hbi {}, head# {} **".format(node.getInfo()["head_block_id"], node.getHeadBlockNum())) + node.processCurlCmd("producer", "resume", "") + Utils.Print("** after node resume, hbi {}, head# {} **".format(node.getInfo()["head_block_id"], node.getHeadBlockNum())) + + def areNodesInSync(nodes:[Node], pauseAll=True, resumeAll=True): + Utils.Print("*** CHECK areNodesInSync") + # Pause all block production to ensure the head is not moving + if pauseAll: + pauseBlockProductions() + time.sleep(2) # Wait for some time to ensure all blocks are propagated + + # Get all current head block IDs for each producer + #headBlockIds = [] + headBlockNums = [] + for node in nodes: + #headBlockId = node.getInfo()["head_block_id"] + #headBlockIds.append(headBlockId) + #headBlockDict[node.nodeId] = node.getInfo()["head_block_num"] + headBlockNums.append(node.getInfo()["head_block_num"]) + + # for hbi in headBlockIds: + # Utils.Print("* headbBockId: {} *".format(hbi)) + for hbn in headBlockNums: + Utils.Print("* headbBockNum: {} *".format(hbn)) + + if resumeAll: + resumeBlockProductions() + + # Wait 1 second, then check if all nodes have previous head blocks by other producers + if len(set(headBlockNums)) != 1: + time.sleep(1) + for node in nodes: + for hbn in set(headBlockNums): + if not node.getBlock(hbn): + Utils.Print("node {} should contain block {}".format(node.nodeId, hbn)) + return False + + #return len(set(headBlockIds)) == 1 + return True + + Utils.Print("+++ Nodes are in sync before preactivation +++") + # Before everything starts, all nodes (new version and old version) should be in sync + assert areNodesInSync(allNodes), "Nodes are not in sync before preactivation" + + # First, we are going to test the case where: + # - 1st node has valid earliest_allowed_activation_time + # - While 2nd and 3rd node have invalid earliest_allowed_activation_time + # Producer in the 1st node is going to activate PREACTIVATE_FEATURE during his turn + # Immediately, in the next block PREACTIVATE_FEATURE should be active in 1st node, but not on 2nd and 3rd + # Therefore, 1st node will be out of sync with 2nd, 3rd, and 4th node + # After a round has passed though, 1st node will realize he's in minority fork and then join the other nodes + # Hence, the PREACTIVATE_FEATURE that was previously activated will be dropped and all of the nodes should be in sync + + Utils.Print("+++ 1st Node should contain PREACTIVATE FEATURE +++") + setValidityOfActTimeSubjRestriction(newNodes[1], "PREACTIVATE_FEATURE", False) + setValidityOfActTimeSubjRestriction(newNodes[2], "PREACTIVATE_FEATURE", False) + + waitUntilBeginningOfProdTurn(newNodes[0], "defproducera") + #newNodes[0].activatePreactivateFeature() + for i in range(3): newNodes[0].activatePreactivateFeature() - - pauseBlockProductions(allNodes) - assert areNodesInSync(newNodes), "New nodes should be in sync" - assert not areNodesInSync(allNodes), "Nodes should not be in sync after preactivation" - resumeBlockProductions(allNodes) - - for node in newNodes: assert shouldNodeContainPreactivateFeature(node), "New node should contain PREACTIVATE_FEATURE" - - activatedBlockNum = newNodes[0].getHeadBlockNum() # The PREACTIVATE_FEATURE should have been activated before or at this block num - assert waitUntilBlockBecomeIrr(newNodes[0], activatedBlockNum), \ - "1st node LIB should be able to advance past the block that contains PREACTIVATE_FEATURE" - assert newNodes[1].getIrreversibleBlockNum() >= activatedBlockNum and \ - newNodes[2].getIrreversibleBlockNum() >= activatedBlockNum, \ - "2nd and 3rd node LIB should also be able to advance past the block that contains PREACTIVATE_FEATURE" - assert oldNode.getIrreversibleBlockNum() <= libBeforePreactivation, \ - "4th node LIB should stuck on LIB before PREACTIVATE_FEATURE is activated" - - # Restart old node with newest version - # Before we are migrating to new version, use --export-reversible-blocks as the old version - # and --import-reversible-blocks with the new version to ensure the compatibility of the reversible blocks - # Finally, when we restart the 4th node with the version of nodeos that supports protocol feature, - # all nodes should be in sync, and the 4th node will also contain PREACTIVATE_FEATURE - portableRevBlkPath = os.path.join(Utils.getNodeDataDir(oldNodeId), "rev_blk_portable_format") - oldNode.kill(signal.SIGTERM) - # Note, for the following relaunch, these will fail to relaunch immediately (expected behavior of export/import), so the chainArg will not replace the old cmd - oldNode.relaunch(chainArg="--export-reversible-blocks {}".format(portableRevBlkPath), timeout=1) - oldNode.relaunch(chainArg="--import-reversible-blocks {}".format(portableRevBlkPath), timeout=1, nodeosPath="programs/nodeos/nodeos") - os.remove(portableRevBlkPath) - - restartNode(oldNode, chainArg="--replay", nodeosPath="programs/nodeos/nodeos") - time.sleep(2) # Give some time to replay - - pauseBlockProductions(allNodes) - assert areNodesInSync(allNodes), "All nodes should be in sync" - resumeBlockProductions(allNodes) - assert shouldNodeContainPreactivateFeature(oldNode), "4th node should contain PREACTIVATE_FEATURE" - - testSuccessful = True - finally: - TestHelper.shutdown(cluster, walletMgr, testSuccessful, killEosInstances, killWallet, keepLogs, killAll, dumpErrorDetails) - - Utils.Print("### END multiversion test ###") - - exitCode = 0 if testSuccessful else 1 - exit(exitCode) - -if __name__ == "__main__": - main() + if shouldNodeContainPreactivateFeature(newNodes[0]): + break + diff =newNodes[0].getInfo()["head_block_num"] - waitUntilBeginningOfProdTurn_head + assert diff >= 12, "1st node should contain PREACTIVATE FEATURE since we set it during its production window" + + assert shouldNodeContainPreactivateFeature(newNodes[0]), "1st node should contain PREACTIVATE FEATURE" + assert not (shouldNodeContainPreactivateFeature(newNodes[1]) or shouldNodeContainPreactivateFeature(newNodes[2])), \ + "2nd and 3rd node should not contain PREACTIVATE FEATURE" + Utils.Print("+++ 2nd, 3rd and 4th node should be in sync +++") + assert areNodesInSync([newNodes[1], newNodes[2], oldNode], resumeAll=False), "2nd, 3rd and 4th node should be in sync" + Utils.Print("+++ 1st node should be out of sync with the rest nodes +++") + assert not areNodesInSync(allNodes, pauseAll=False), "+++ 1st node should be out of sync with the rest nodes +++" + + waitForOneRound() + + assert not shouldNodeContainPreactivateFeature(newNodes[0]), "PREACTIVATE_FEATURE should be dropped" + assert areNodesInSync(allNodes), "All nodes should be in sync" + + # Then we set the earliest_allowed_activation_time of 2nd node and 3rd node with valid value + # Once the 1st node activate PREACTIVATE_FEATURE, all of them should have PREACTIVATE_FEATURE activated in the next block + # They will be in sync and their LIB will advance since they control > 2/3 of the producers + # Also the LIB should be able to advance past the block that contains PREACTIVATE_FEATURE + # However, the 4th node will be out of sync with them, and its LIB will stuck + setValidityOfActTimeSubjRestriction(newNodes[1], "PREACTIVATE_FEATURE", True) + setValidityOfActTimeSubjRestriction(newNodes[2], "PREACTIVATE_FEATURE", True) + + waitUntilBeginningOfProdTurn(newNodes[0], "defproducera") + libBeforePreactivation = newNodes[0].getIrreversibleBlockNum() + newNodes[0].activatePreactivateFeature() + + assert areNodesInSync(newNodes), "New nodes should be in sync" + assert not areNodesInSync(allNodes), "Nodes should not be in sync after preactivation" + for node in newNodes: assert shouldNodeContainPreactivateFeature(node), "New node should contain PREACTIVATE_FEATURE" + + activatedBlockNum = newNodes[0].getHeadBlockNum() # The PREACTIVATE_FEATURE should have been activated before or at this block num + assert waitUntilBlockBecomeIrr(newNodes[0], activatedBlockNum), \ + "1st node LIB should be able to advance past the block that contains PREACTIVATE_FEATURE" + assert newNodes[1].getIrreversibleBlockNum() >= activatedBlockNum and \ + newNodes[2].getIrreversibleBlockNum() >= activatedBlockNum, \ + "2nd and 3rd node LIB should also be able to advance past the block that contains PREACTIVATE_FEATURE" + assert oldNode.getIrreversibleBlockNum() <= libBeforePreactivation, \ + "4th node LIB should stuck on LIB before PREACTIVATE_FEATURE is activated" + + # Restart old node with newest version + # Before we are migrating to new version, use --export-reversible-blocks as the old version + # and --import-reversible-blocks with the new version to ensure the compatibility of the reversible blocks + # Finally, when we restart the 4th node with the version of nodeos that supports protocol feature, + # all nodes should be in sync, and the 4th node will also contain PREACTIVATE_FEATURE + portableRevBlkPath = os.path.join(Utils.getNodeDataDir(oldNodeId), "rev_blk_portable_format") + oldNode.kill(signal.SIGTERM) + # Note, for the following relaunch, these will fail to relaunch immediately (expected behavior of export/import), so the chainArg will not replace the old cmd + oldNode.relaunch(chainArg="--export-reversible-blocks {}".format(portableRevBlkPath), timeout=1) + oldNode.relaunch(chainArg="--import-reversible-blocks {}".format(portableRevBlkPath), timeout=1, nodeosPath="programs/nodeos/nodeos") + os.remove(portableRevBlkPath) + + restartNode(oldNode, chainArg="--replay", nodeosPath="programs/nodeos/nodeos") + time.sleep(2) # Give some time to replay + + assert areNodesInSync(allNodes), "All nodes should be in sync" + assert shouldNodeContainPreactivateFeature(oldNode), "4th node should contain PREACTIVATE_FEATURE" + + testSuccessful = True +finally: + TestHelper.shutdown(cluster, walletMgr, testSuccessful, killEosInstances, killWallet, keepLogs, killAll, dumpErrorDetails) + +Utils.Print("### END multiversion test ###") + +exitCode = 0 if testSuccessful else 1 +exit(exitCode) From 9247a72757c5a47e03b926db9cd381f627bf902d Mon Sep 17 00:00:00 2001 From: Qing Yang Date: Fri, 12 Feb 2021 12:18:42 -0500 Subject: [PATCH 06/11] change the sequence of checking and resume in areNodesInSync --- ..._multiple_version_protocol_feature_test.py | 84 +++++++------------ 1 file changed, 30 insertions(+), 54 deletions(-) diff --git a/tests/nodeos_multiple_version_protocol_feature_test.py b/tests/nodeos_multiple_version_protocol_feature_test.py index 7b639ef8e4e..83c7d8cc0ef 100755 --- a/tests/nodeos_multiple_version_protocol_feature_test.py +++ b/tests/nodeos_multiple_version_protocol_feature_test.py @@ -13,7 +13,6 @@ from os.path import join, exists from datetime import datetime -Utils.Print("### BEGIN multiversion test ###") ############################################################### # nodeos_multiple_version_protocol_feature_test # @@ -47,26 +46,20 @@ def restartNode(node: Node, chainArg=None, addSwapFlags=None, nodeosPath=None): def shouldNodeContainPreactivateFeature(node): preactivateFeatureDigest = node.getSupportedProtocolFeatureDict()["PREACTIVATE_FEATURE"]["feature_digest"] assert preactivateFeatureDigest, "preactivateFeatureDigest should not be empty" - Utils.Print("preactivateFeatureDigest: {}".format(preactivateFeatureDigest)) blockHeaderState = node.getLatestBlockHeaderState() assert blockHeaderState, "blockHeaderState should not be empty" activatedProtocolFeatures = blockHeaderState["activated_protocol_features"]["protocol_features"] - Utils.Print("activatedProtocolFeatures size: {}".format(len(activatedProtocolFeatures))) - for f in activatedProtocolFeatures: - Utils.Print("activatedProtocolFeature: {}".format(f)) return preactivateFeatureDigest in activatedProtocolFeatures -waitUntilBeginningOfProdTurn_head = 0 +beginningOfProdTurn_head = 0 def waitUntilBeginningOfProdTurn(node, producerName, timeout=30, sleepTime=0.4): def isDesiredProdTurn(): - #headBlockNum = node.getHeadBlockNum() - waitUntilBeginningOfProdTurn_head = node.getHeadBlockNum() - res = node.getBlock(waitUntilBeginningOfProdTurn_head)["producer"] == producerName and \ - node.getBlock(waitUntilBeginningOfProdTurn_head-1)["producer"] != producerName + beginningOfProdTurn_head = node.getHeadBlockNum() + res = node.getBlock(beginningOfProdTurn_head)["producer"] == producerName and \ + node.getBlock(beginningOfProdTurn_head-1)["producer"] != producerName return res - #Utils.waitForTruth(isDesiredProdTurn, timeout, sleepTime) ret = Utils.waitForTruth(isDesiredProdTurn, timeout, sleepTime) - assert ret != None, "Expected producer to arrive within 19 seconds (3 other producers)" + assert ret != None, "Expected producer to arrive within 19 seconds (with 3 other producers)" def waitForOneRound(): time.sleep(24) # We have 4 producers for this test @@ -124,53 +117,40 @@ def hasBlockBecomeIrr(): def pauseBlockProductions(): for node in allNodes: - if not node.killed: - Utils.Print("** before node pause, hbi {}, head# {} **".format(node.getInfo()["head_block_id"], node.getHeadBlockNum())) - node.processCurlCmd("producer", "pause", "") - Utils.Print("** after node pause, hbi {}, head# {} **".format(node.getInfo()["head_block_id"], node.getHeadBlockNum())) + if not node.killed: node.processCurlCmd("producer", "pause", "") def resumeBlockProductions(): for node in allNodes: - if not node.killed: - Utils.Print("** before node resume, hbi {}, head# {} **".format(node.getInfo()["head_block_id"], node.getHeadBlockNum())) - node.processCurlCmd("producer", "resume", "") - Utils.Print("** after node resume, hbi {}, head# {} **".format(node.getInfo()["head_block_id"], node.getHeadBlockNum())) + if not node.killed: node.processCurlCmd("producer", "resume", "") def areNodesInSync(nodes:[Node], pauseAll=True, resumeAll=True): - Utils.Print("*** CHECK areNodesInSync") # Pause all block production to ensure the head is not moving if pauseAll: pauseBlockProductions() time.sleep(2) # Wait for some time to ensure all blocks are propagated - - # Get all current head block IDs for each producer - #headBlockIds = [] - headBlockNums = [] + # Get current head block number for each producer + headBlockNums = set() for node in nodes: - #headBlockId = node.getInfo()["head_block_id"] - #headBlockIds.append(headBlockId) - #headBlockDict[node.nodeId] = node.getInfo()["head_block_num"] - headBlockNums.append(node.getInfo()["head_block_num"]) - - # for hbi in headBlockIds: - # Utils.Print("* headbBockId: {} *".format(hbi)) - for hbn in headBlockNums: - Utils.Print("* headbBockNum: {} *".format(hbn)) - - if resumeAll: - resumeBlockProductions() - - # Wait 1 second, then check if all nodes have previous head blocks by other producers - if len(set(headBlockNums)) != 1: + hbn = node.getInfo()["head_block_num"] + headBlockNums.add(hbn) + Utils.Print("node {}, hbn: {}".format(node.nodeId, hbn)) + inSync = True + if len(headBlockNums) != 1: + def nodeHasBlocks(node, blockNums): + for bn in blockNums: + if node.getBlock(bn) is None: + Utils.Print("node {} cannot get block {}".format(node.nodeId, bn)) + return False + return True + # Wait 1 second, then check if all nodes have previously saved head blocks of other producers time.sleep(1) for node in nodes: - for hbn in set(headBlockNums): - if not node.getBlock(hbn): - Utils.Print("node {} should contain block {}".format(node.nodeId, hbn)) - return False - - #return len(set(headBlockIds)) == 1 - return True + if not nodeHasBlocks(node, headBlockNums): + inSync = False + break + if resumeAll: + resumeBlockProductions() + return inSync Utils.Print("+++ Nodes are in sync before preactivation +++") # Before everything starts, all nodes (new version and old version) should be in sync @@ -184,26 +164,24 @@ def areNodesInSync(nodes:[Node], pauseAll=True, resumeAll=True): # Therefore, 1st node will be out of sync with 2nd, 3rd, and 4th node # After a round has passed though, 1st node will realize he's in minority fork and then join the other nodes # Hence, the PREACTIVATE_FEATURE that was previously activated will be dropped and all of the nodes should be in sync - Utils.Print("+++ 1st Node should contain PREACTIVATE FEATURE +++") setValidityOfActTimeSubjRestriction(newNodes[1], "PREACTIVATE_FEATURE", False) setValidityOfActTimeSubjRestriction(newNodes[2], "PREACTIVATE_FEATURE", False) waitUntilBeginningOfProdTurn(newNodes[0], "defproducera") - #newNodes[0].activatePreactivateFeature() + # Retry activatePreactivateFeature for the 1st node after it enters production window for i in range(3): newNodes[0].activatePreactivateFeature() if shouldNodeContainPreactivateFeature(newNodes[0]): break - diff =newNodes[0].getInfo()["head_block_num"] - waitUntilBeginningOfProdTurn_head + diff = newNodes[0].getInfo()["head_block_num"] - beginningOfProdTurn_head assert diff >= 12, "1st node should contain PREACTIVATE FEATURE since we set it during its production window" assert shouldNodeContainPreactivateFeature(newNodes[0]), "1st node should contain PREACTIVATE FEATURE" assert not (shouldNodeContainPreactivateFeature(newNodes[1]) or shouldNodeContainPreactivateFeature(newNodes[2])), \ "2nd and 3rd node should not contain PREACTIVATE FEATURE" - Utils.Print("+++ 2nd, 3rd and 4th node should be in sync +++") + Utils.Print("+++ 2nd, 3rd and 4th node should be in sync, and 1st node should be out of sync +++") assert areNodesInSync([newNodes[1], newNodes[2], oldNode], resumeAll=False), "2nd, 3rd and 4th node should be in sync" - Utils.Print("+++ 1st node should be out of sync with the rest nodes +++") assert not areNodesInSync(allNodes, pauseAll=False), "+++ 1st node should be out of sync with the rest nodes +++" waitForOneRound() @@ -258,7 +236,5 @@ def areNodesInSync(nodes:[Node], pauseAll=True, resumeAll=True): finally: TestHelper.shutdown(cluster, walletMgr, testSuccessful, killEosInstances, killWallet, keepLogs, killAll, dumpErrorDetails) -Utils.Print("### END multiversion test ###") - exitCode = 0 if testSuccessful else 1 exit(exitCode) From 9ce99cc773ecaafc92f7df980a26dfc8e907733d Mon Sep 17 00:00:00 2001 From: Qing Yang Date: Fri, 12 Feb 2021 14:42:25 -0500 Subject: [PATCH 07/11] fix areNodesInSync checking --- ..._multiple_version_protocol_feature_test.py | 44 ++++++++++--------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/tests/nodeos_multiple_version_protocol_feature_test.py b/tests/nodeos_multiple_version_protocol_feature_test.py index 83c7d8cc0ef..c642e2d8636 100755 --- a/tests/nodeos_multiple_version_protocol_feature_test.py +++ b/tests/nodeos_multiple_version_protocol_feature_test.py @@ -128,26 +128,30 @@ def areNodesInSync(nodes:[Node], pauseAll=True, resumeAll=True): if pauseAll: pauseBlockProductions() time.sleep(2) # Wait for some time to ensure all blocks are propagated - # Get current head block number for each producer - headBlockNums = set() + + # Get current head block number and IDs for each producer + headBlockNums = [] + headBlockIds = [] for node in nodes: - hbn = node.getInfo()["head_block_num"] - headBlockNums.add(hbn) - Utils.Print("node {}, hbn: {}".format(node.nodeId, hbn)) + hb = node.getInfo() + headBlockNums.append(hb["head_block_num"]) + headBlockIds.append(hb["head_block_id"]) + Utils.Print("node {}, head block id: {}, num: {}".format(node.nodeId, hb["head_block_id"], hb["head_block_num"])) + assert len(set(headBlockNums)) == len(set(headBlockIds)), "Different block IDs have the same block numbers, thus nodes are not in sync" + def nodeHasBlocks(node, blockIds, blockNums): + for blkNum, blkId in zip(blockNums, blockIds): + assert node.waitForBlock(blkNum, timeout=3) != None, "Expected to find block {}, but only reached {}".format(blkNum, node.getInfo()["head_block_num"]) + if node.getBlock(blkNum) is None: + Utils.Print("node {} cannot get block Id: {} (num: {})".format(node.nodeId, blkId, blkNum)) + return False + return True + # Check if each node has head blocks from other producers inSync = True - if len(headBlockNums) != 1: - def nodeHasBlocks(node, blockNums): - for bn in blockNums: - if node.getBlock(bn) is None: - Utils.Print("node {} cannot get block {}".format(node.nodeId, bn)) - return False - return True - # Wait 1 second, then check if all nodes have previously saved head blocks of other producers - time.sleep(1) - for node in nodes: - if not nodeHasBlocks(node, headBlockNums): - inSync = False - break + for node in nodes: + if not nodeHasBlocks(node, headBlockIds, headBlockNums): + inSync = False + break + if resumeAll: resumeBlockProductions() return inSync @@ -181,8 +185,8 @@ def nodeHasBlocks(node, blockNums): assert not (shouldNodeContainPreactivateFeature(newNodes[1]) or shouldNodeContainPreactivateFeature(newNodes[2])), \ "2nd and 3rd node should not contain PREACTIVATE FEATURE" Utils.Print("+++ 2nd, 3rd and 4th node should be in sync, and 1st node should be out of sync +++") - assert areNodesInSync([newNodes[1], newNodes[2], oldNode], resumeAll=False), "2nd, 3rd and 4th node should be in sync" - assert not areNodesInSync(allNodes, pauseAll=False), "+++ 1st node should be out of sync with the rest nodes +++" + assert areNodesInSync([newNodes[1], newNodes[2], oldNode], pauseAll=True, resumeAll=False), "2nd, 3rd and 4th node should be in sync" + assert not areNodesInSync(allNodes, pauseAll=False, resumeAll=True), "+++ 1st node should be out of sync with the rest nodes +++" waitForOneRound() From fd5e61931ecd41d7c7fa914fc8d82dbba4f0ef66 Mon Sep 17 00:00:00 2001 From: Qing Yang Date: Fri, 12 Feb 2021 15:57:03 -0500 Subject: [PATCH 08/11] only call nodeHasBlocks when headBlockNums length is not 1 --- ..._multiple_version_protocol_feature_test.py | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/tests/nodeos_multiple_version_protocol_feature_test.py b/tests/nodeos_multiple_version_protocol_feature_test.py index c642e2d8636..85dfe02a7d5 100755 --- a/tests/nodeos_multiple_version_protocol_feature_test.py +++ b/tests/nodeos_multiple_version_protocol_feature_test.py @@ -138,19 +138,20 @@ def areNodesInSync(nodes:[Node], pauseAll=True, resumeAll=True): headBlockIds.append(hb["head_block_id"]) Utils.Print("node {}, head block id: {}, num: {}".format(node.nodeId, hb["head_block_id"], hb["head_block_num"])) assert len(set(headBlockNums)) == len(set(headBlockIds)), "Different block IDs have the same block numbers, thus nodes are not in sync" - def nodeHasBlocks(node, blockIds, blockNums): - for blkNum, blkId in zip(blockNums, blockIds): - assert node.waitForBlock(blkNum, timeout=3) != None, "Expected to find block {}, but only reached {}".format(blkNum, node.getInfo()["head_block_num"]) - if node.getBlock(blkNum) is None: - Utils.Print("node {} cannot get block Id: {} (num: {})".format(node.nodeId, blkId, blkNum)) - return False - return True # Check if each node has head blocks from other producers inSync = True - for node in nodes: - if not nodeHasBlocks(node, headBlockIds, headBlockNums): - inSync = False - break + if len(set(headBlockNums)) != 1: + def nodeHasBlocks(node, blockIds, blockNums): + for blkNum, blkId in zip(blockNums, blockIds): + assert node.waitForBlock(blkNum, timeout=3) != None, "Expected to find block {}, but only reached {}".format(blkNum, node.getInfo()["head_block_num"]) + if node.getBlock(blkNum) is None: + Utils.Print("node {} cannot get block Id: {} (num: {})".format(node.nodeId, blkId, blkNum)) + return False + return True + for node in nodes: + if not nodeHasBlocks(node, headBlockIds, headBlockNums): + inSync = False + break if resumeAll: resumeBlockProductions() @@ -205,8 +206,8 @@ def nodeHasBlocks(node, blockIds, blockNums): libBeforePreactivation = newNodes[0].getIrreversibleBlockNum() newNodes[0].activatePreactivateFeature() - assert areNodesInSync(newNodes), "New nodes should be in sync" - assert not areNodesInSync(allNodes), "Nodes should not be in sync after preactivation" + assert areNodesInSync(newNodes, pauseAll=True, resumeAll=False), "New nodes should be in sync" + assert not areNodesInSync(allNodes, pauseAll=False, resumeAll=True), "Nodes should not be in sync after preactivation" for node in newNodes: assert shouldNodeContainPreactivateFeature(node), "New node should contain PREACTIVATE_FEATURE" activatedBlockNum = newNodes[0].getHeadBlockNum() # The PREACTIVATE_FEATURE should have been activated before or at this block num From 0819ebaaf53fe252487f00a1f2d4b641640656a5 Mon Sep 17 00:00:00 2001 From: Qing Yang Date: Tue, 16 Feb 2021 08:45:24 -0500 Subject: [PATCH 09/11] fix areNodesInSync, 1st node feature checking, and formatting --- tests/Node.py | 12 +++-- ..._multiple_version_protocol_feature_test.py | 51 ++++++++++--------- 2 files changed, 34 insertions(+), 29 deletions(-) diff --git a/tests/Node.py b/tests/Node.py index 8927ae289bd..0688f064818 100644 --- a/tests/Node.py +++ b/tests/Node.py @@ -221,12 +221,16 @@ def validateAccounts(self, accounts): raise # pylint: disable=too-many-branches - def getBlock(self, blockNum, silentErrors=False, exitOnError=False): + def getBlock(self, blockNumOrId, silentErrors=False, exitOnError=False): """Given a blockId will return block details.""" - assert(isinstance(blockNum, int)) + assert(isinstance(blockNumOrId, int) or isinstance(blockNumOrId, str)) cmdDesc="get block" - cmd="%s %d" % (cmdDesc, blockNum) - msg="(block number=%s)" % (blockNum); + if isinstance(blockNumOrId, int): + cmd="%s %d" % (cmdDesc, blockNumOrId) + msg="(block number=%s)" % (blockNumOrId) + else: + cmd="%s %s" % (cmdDesc, blockNumOrId) + msg="(block id=%s)" % (blockNumOrId) return self.processCleosCmd(cmd, cmdDesc, silentErrors=silentErrors, exitOnError=exitOnError, exitMsg=msg) def isBlockPresent(self, blockNum, blockType=BlockType.head): diff --git a/tests/nodeos_multiple_version_protocol_feature_test.py b/tests/nodeos_multiple_version_protocol_feature_test.py index 85dfe02a7d5..331e665df8a 100755 --- a/tests/nodeos_multiple_version_protocol_feature_test.py +++ b/tests/nodeos_multiple_version_protocol_feature_test.py @@ -51,12 +51,12 @@ def shouldNodeContainPreactivateFeature(node): activatedProtocolFeatures = blockHeaderState["activated_protocol_features"]["protocol_features"] return preactivateFeatureDigest in activatedProtocolFeatures -beginningOfProdTurn_head = 0 +beginningOfProdTurnHead = 0 def waitUntilBeginningOfProdTurn(node, producerName, timeout=30, sleepTime=0.4): def isDesiredProdTurn(): - beginningOfProdTurn_head = node.getHeadBlockNum() - res = node.getBlock(beginningOfProdTurn_head)["producer"] == producerName and \ - node.getBlock(beginningOfProdTurn_head-1)["producer"] != producerName + beginningOfProdTurnHead = node.getHeadBlockNum() + res = node.getBlock(beginningOfProdTurnHead)["producer"] == producerName and \ + node.getBlock(beginningOfProdTurnHead-1)["producer"] != producerName return res ret = Utils.waitForTruth(isDesiredProdTurn, timeout, sleepTime) assert ret != None, "Expected producer to arrive within 19 seconds (with 3 other producers)" @@ -97,17 +97,17 @@ def hasBlockBecomeIrr(): # version 1.7 did not provide a default value for "--last-block-time-offset-us" so this is needed to # avoid dropping late blocks assert cluster.launch(pnodes=4, totalNodes=4, prodCount=1, totalProducers=4, - extraNodeosArgs=" --plugin eosio::producer_api_plugin ", - useBiosBootFile=False, - specificExtraNodeosArgs={ - 0:"--http-max-response-time-ms 990000", - 1:"--http-max-response-time-ms 990000", - 2:"--http-max-response-time-ms 990000", - 3:"--last-block-time-offset-us -200000"}, - onlySetProds=True, - pfSetupPolicy=PFSetupPolicy.NONE, - alternateVersionLabelsFile=alternateVersionLabelsFile, - associatedNodeLabels=associatedNodeLabels), "Unable to launch cluster" + extraNodeosArgs=" --plugin eosio::producer_api_plugin ", + useBiosBootFile=False, + specificExtraNodeosArgs={ + 0:"--http-max-response-time-ms 990000", + 1:"--http-max-response-time-ms 990000", + 2:"--http-max-response-time-ms 990000", + 3:"--last-block-time-offset-us -200000"}, + onlySetProds=True, + pfSetupPolicy=PFSetupPolicy.NONE, + alternateVersionLabelsFile=alternateVersionLabelsFile, + associatedNodeLabels=associatedNodeLabels), "Unable to launch cluster" newNodeIds = [0, 1, 2] oldNodeId = 3 @@ -142,10 +142,10 @@ def areNodesInSync(nodes:[Node], pauseAll=True, resumeAll=True): inSync = True if len(set(headBlockNums)) != 1: def nodeHasBlocks(node, blockIds, blockNums): - for blkNum, blkId in zip(blockNums, blockIds): + for blkId, blkNum in zip(blockIds, blockNums): assert node.waitForBlock(blkNum, timeout=3) != None, "Expected to find block {}, but only reached {}".format(blkNum, node.getInfo()["head_block_num"]) - if node.getBlock(blkNum) is None: - Utils.Print("node {} cannot get block Id: {} (num: {})".format(node.nodeId, blkId, blkNum)) + if node.getBlock(blkId) is None: + Utils.Print("node {} does not get block Id: {} (num: {})".format(node.nodeId, blkId, blkNum)) return False return True for node in nodes: @@ -173,13 +173,14 @@ def nodeHasBlocks(node, blockIds, blockNums): setValidityOfActTimeSubjRestriction(newNodes[1], "PREACTIVATE_FEATURE", False) setValidityOfActTimeSubjRestriction(newNodes[2], "PREACTIVATE_FEATURE", False) - waitUntilBeginningOfProdTurn(newNodes[0], "defproducera") - # Retry activatePreactivateFeature for the 1st node after it enters production window for i in range(3): + Utils.Print("1st node tries activatePreactivateFeature time(s): {}".format(i+1)) + # 1st node waits for the start of the production turn each time it tries activatePreactivateFeature() + waitUntilBeginningOfProdTurn(newNodes[0], "defproducera") newNodes[0].activatePreactivateFeature() if shouldNodeContainPreactivateFeature(newNodes[0]): break - diff = newNodes[0].getInfo()["head_block_num"] - beginningOfProdTurn_head + diff = newNodes[0].getInfo()["head_block_num"] - beginningOfProdTurnHead assert diff >= 12, "1st node should contain PREACTIVATE FEATURE since we set it during its production window" assert shouldNodeContainPreactivateFeature(newNodes[0]), "1st node should contain PREACTIVATE FEATURE" @@ -212,12 +213,12 @@ def nodeHasBlocks(node, blockIds, blockNums): activatedBlockNum = newNodes[0].getHeadBlockNum() # The PREACTIVATE_FEATURE should have been activated before or at this block num assert waitUntilBlockBecomeIrr(newNodes[0], activatedBlockNum), \ - "1st node LIB should be able to advance past the block that contains PREACTIVATE_FEATURE" + "1st node LIB should be able to advance past the block that contains PREACTIVATE_FEATURE" assert newNodes[1].getIrreversibleBlockNum() >= activatedBlockNum and \ - newNodes[2].getIrreversibleBlockNum() >= activatedBlockNum, \ - "2nd and 3rd node LIB should also be able to advance past the block that contains PREACTIVATE_FEATURE" + newNodes[2].getIrreversibleBlockNum() >= activatedBlockNum, \ + "2nd and 3rd node LIB should also be able to advance past the block that contains PREACTIVATE_FEATURE" assert oldNode.getIrreversibleBlockNum() <= libBeforePreactivation, \ - "4th node LIB should stuck on LIB before PREACTIVATE_FEATURE is activated" + "4th node LIB should be stuck on LIB before PREACTIVATE_FEATURE is activated" # Restart old node with newest version # Before we are migrating to new version, use --export-reversible-blocks as the old version From 9b4378c07438942e1d535b1f9dfa899a55bf7a1f Mon Sep 17 00:00:00 2001 From: Qing Yang Date: Tue, 16 Feb 2021 13:28:06 -0500 Subject: [PATCH 10/11] fix typo --- tests/nodeos_multiple_version_protocol_feature_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/nodeos_multiple_version_protocol_feature_test.py b/tests/nodeos_multiple_version_protocol_feature_test.py index 331e665df8a..384be146fb9 100755 --- a/tests/nodeos_multiple_version_protocol_feature_test.py +++ b/tests/nodeos_multiple_version_protocol_feature_test.py @@ -145,7 +145,7 @@ def nodeHasBlocks(node, blockIds, blockNums): for blkId, blkNum in zip(blockIds, blockNums): assert node.waitForBlock(blkNum, timeout=3) != None, "Expected to find block {}, but only reached {}".format(blkNum, node.getInfo()["head_block_num"]) if node.getBlock(blkId) is None: - Utils.Print("node {} does not get block Id: {} (num: {})".format(node.nodeId, blkId, blkNum)) + Utils.Print("node {} does not have block Id: {} (num: {})".format(node.nodeId, blkId, blkNum)) return False return True for node in nodes: From c960fdcf9c9d647d777720a976bcb5c0324d4237 Mon Sep 17 00:00:00 2001 From: Qing Yang Date: Tue, 16 Feb 2021 19:08:24 -0500 Subject: [PATCH 11/11] clean up getBlock implementation --- tests/Node.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/Node.py b/tests/Node.py index 0688f064818..cba8668d55e 100644 --- a/tests/Node.py +++ b/tests/Node.py @@ -225,12 +225,9 @@ def getBlock(self, blockNumOrId, silentErrors=False, exitOnError=False): """Given a blockId will return block details.""" assert(isinstance(blockNumOrId, int) or isinstance(blockNumOrId, str)) cmdDesc="get block" - if isinstance(blockNumOrId, int): - cmd="%s %d" % (cmdDesc, blockNumOrId) - msg="(block number=%s)" % (blockNumOrId) - else: - cmd="%s %s" % (cmdDesc, blockNumOrId) - msg="(block id=%s)" % (blockNumOrId) + numOrId="number" if isinstance(blockNumOrId, int) else "id" + cmd="%s %s" % (cmdDesc, blockNumOrId) + msg="(block %s=%s)" % (numOrId, blockNumOrId) return self.processCleosCmd(cmd, cmdDesc, silentErrors=silentErrors, exitOnError=exitOnError, exitMsg=msg) def isBlockPresent(self, blockNum, blockType=BlockType.head):