From 9df629c7ceb0d3b034766320fd0f09ceb885b6b6 Mon Sep 17 00:00:00 2001 From: Jarred Parr Date: Tue, 24 Sep 2024 20:39:12 -0600 Subject: [PATCH] [CX_HARDENING] Evaluate and Remove Issue Links in HotShot (#3701) --- crates/hotshot/src/lib.rs | 2 - .../src/traits/networking/libp2p_network.rs | 1 - crates/libp2p-networking/README.md | 2 +- crates/libp2p-networking/src/network/node.rs | 4 - crates/task-impls/src/consensus/handlers.rs | 90 ++++++------------- crates/task-impls/src/consensus/mod.rs | 5 +- crates/task-impls/src/helpers.rs | 6 +- crates/task-impls/src/network.rs | 7 +- crates/task-impls/src/view_sync.rs | 3 - crates/testing/src/helpers.rs | 8 +- crates/testing/src/test_builder.rs | 4 - .../tests/tests_1/test_with_failures_2.rs | 2 - .../tests/tests_2/test_with_failures_one.rs | 2 - .../tests_3/test_with_failures_half_f.rs | 2 - .../tests/tests_4/test_with_failures_f.rs | 2 - crates/testing/tests/tests_5/timeout.rs | 6 -- crates/types/src/data.rs | 42 --------- crates/types/src/error.rs | 2 - crates/types/src/message.rs | 1 - crates/types/src/simple_certificate.rs | 21 ----- crates/types/src/vid.rs | 10 ++- 21 files changed, 46 insertions(+), 176 deletions(-) diff --git a/crates/hotshot/src/lib.rs b/crates/hotshot/src/lib.rs index 2fd7cde872..4c5e40a0b5 100644 --- a/crates/hotshot/src/lib.rs +++ b/crates/hotshot/src/lib.rs @@ -436,8 +436,6 @@ impl, V: Versions> SystemContext) { debug!(?event, "send_external_event"); broadcast_event(event, &self.external_event_stream.0).await; diff --git a/crates/hotshot/src/traits/networking/libp2p_network.rs b/crates/hotshot/src/traits/networking/libp2p_network.rs index 39f5965751..fbb5f32c67 100644 --- a/crates/hotshot/src/traits/networking/libp2p_network.rs +++ b/crates/hotshot/src/traits/networking/libp2p_network.rs @@ -552,7 +552,6 @@ impl Libp2pNetwork { bootstrap_addrs, is_ready: Arc::new(AtomicBool::new(false)), // This is optimal for 10-30 nodes. TODO: parameterize this for both tests and examples - // https://github.com/EspressoSystems/HotShot/issues/2088 dht_timeout: config.dht_timeout.unwrap_or(Duration::from_secs(120)), is_bootstrapped: Arc::new(AtomicBool::new(false)), metrics, diff --git a/crates/libp2p-networking/README.md b/crates/libp2p-networking/README.md index b6f7b86bff..62a190398b 100644 --- a/crates/libp2p-networking/README.md +++ b/crates/libp2p-networking/README.md @@ -45,7 +45,7 @@ spawns off five integration tests. - One that intersperses both broadcast and increments. - Two that publishes entries to the DHT and checks that other nodes can access these entries. -This can fail on MacOS (and linux) due to ["too many open files."](https://github.com/EspressoSystems/hotshot-networking-demo/issues/18) The fix is: +This can fail on MacOS (and linux) due to "too many open files." The fix is: ```bash ulimit -n 4096 diff --git a/crates/libp2p-networking/src/network/node.rs b/crates/libp2p-networking/src/network/node.rs index c90d1f1fc3..47e6e6d5e3 100644 --- a/crates/libp2p-networking/src/network/node.rs +++ b/crates/libp2p-networking/src/network/node.rs @@ -222,10 +222,6 @@ impl NetworkNode { // - Build a gossipsub network behavior let gossipsub: Gossipsub = Gossipsub::new( - // TODO do we even need this? - // - // if messages are signed at the the consensus level AND the network - // level (noise), this feels redundant. MessageAuthenticity::Signed(keypair.clone()), gossipsub_config, ) diff --git a/crates/task-impls/src/consensus/handlers.rs b/crates/task-impls/src/consensus/handlers.rs index 9ef7f69702..a2c4e6170c 100644 --- a/crates/task-impls/src/consensus/handlers.rs +++ b/crates/task-impls/src/consensus/handlers.rs @@ -218,14 +218,16 @@ pub async fn publish_proposal_from_commitment_and_metadata July, 2024 and this is still here, something has gone horribly wrong. - let cnm = commitment_and_metadata - .clone() - .context("Cannot propose because we don't have the VID payload commitment and metadata")?; + ensure!( + commitment_and_metadata.is_some(), + "Cannot propose because we don't have the VID payload commitment and metadata" + ); + + // This is a safe unwrap due to the prior ensure call. + let commitment_and_metadata = commitment_and_metadata.unwrap(); ensure!( - cnm.block_view == view, + commitment_and_metadata.block_view == view, "Cannot propose because our VID payload commitment and metadata is for an older view." ); @@ -236,7 +238,7 @@ pub async fn publish_proposal_from_commitment_and_metadata( - view: TYPES::Time, - sender: Sender>>, - receiver: Receiver>>, - quorum_membership: Arc, - public_key: TYPES::SignatureKey, - private_key: ::PrivateKey, - consensus: OuterConsensus, - delay: u64, - formed_upgrade_certificate: Option>, - upgrade_lock: UpgradeLock, - commitment_and_metadata: Option>, - proposal_cert: Option>, - instance_state: Arc, - id: u64, -) -> Result> { - publish_proposal_from_commitment_and_metadata( - view, - sender, - receiver, - quorum_membership, - public_key, - private_key, - consensus, - delay, - formed_upgrade_certificate, - upgrade_lock, - commitment_and_metadata, - proposal_cert, - instance_state, - id, - ) - .await -} - /// Handle the received quorum proposal. /// /// Returns the proposal that should be used to set the `cur_proposal` for other tasks. @@ -490,23 +453,24 @@ pub(crate) async fn handle_quorum_proposal_recv< "Attempting to publish proposal after voting for liveness; now in view: {}", *new_view ); - let create_and_send_proposal_handle = publish_proposal_if_able( - qc.view_number + 1, - event_sender, - event_receiver, - Arc::clone(&task_state.quorum_membership), - task_state.public_key.clone(), - task_state.private_key.clone(), - OuterConsensus::new(Arc::clone(&task_state.consensus.inner_consensus)), - task_state.round_start_delay, - task_state.formed_upgrade_certificate.clone(), - task_state.upgrade_lock.clone(), - task_state.payload_commitment_and_metadata.clone(), - task_state.proposal_cert.clone(), - Arc::clone(&task_state.instance_state), - task_state.id, - ) - .await?; + let create_and_send_proposal_handle = + publish_proposal_from_commitment_and_metadata( + qc.view_number + 1, + event_sender, + event_receiver, + Arc::clone(&task_state.quorum_membership), + task_state.public_key.clone(), + task_state.private_key.clone(), + OuterConsensus::new(Arc::clone(&task_state.consensus.inner_consensus)), + task_state.round_start_delay, + task_state.formed_upgrade_certificate.clone(), + task_state.upgrade_lock.clone(), + task_state.payload_commitment_and_metadata.clone(), + task_state.proposal_cert.clone(), + Arc::clone(&task_state.instance_state), + task_state.id, + ) + .await?; task_state .spawned_tasks diff --git a/crates/task-impls/src/consensus/mod.rs b/crates/task-impls/src/consensus/mod.rs index c4b7291bc3..8770de2af4 100644 --- a/crates/task-impls/src/consensus/mod.rs +++ b/crates/task-impls/src/consensus/mod.rs @@ -14,6 +14,7 @@ use async_lock::RwLock; use async_std::task::JoinHandle; use async_trait::async_trait; use futures::future::join_all; +use handlers::publish_proposal_from_commitment_and_metadata; use hotshot_task::task::TaskState; use hotshot_types::{ consensus::{CommitmentAndMetadata, OuterConsensus}, @@ -38,7 +39,7 @@ use tracing::{debug, error, info, instrument, warn}; use crate::{ consensus::handlers::{ - handle_quorum_proposal_recv, handle_quorum_proposal_validated, publish_proposal_if_able, + handle_quorum_proposal_recv, handle_quorum_proposal_validated, update_state_and_vote_if_able, VoteInfo, }, events::HotShotEvent, @@ -191,7 +192,7 @@ impl, V: Versions> ConsensusTaskSt event_sender: Sender>>, event_receiver: Receiver>>, ) -> Result<()> { - let create_and_send_proposal_handle = publish_proposal_if_able( + let create_and_send_proposal_handle = publish_proposal_from_commitment_and_metadata( view, event_sender, event_receiver, diff --git a/crates/task-impls/src/helpers.rs b/crates/task-impls/src/helpers.rs index 67f78ac670..1c266029c9 100644 --- a/crates/task-impls/src/helpers.rs +++ b/crates/task-impls/src/helpers.rs @@ -39,7 +39,7 @@ use hotshot_types::{ }; #[cfg(async_executor_impl = "tokio")] use tokio::task::JoinHandle; -use tracing::{debug, error, info, instrument, warn}; +use tracing::{debug, info, instrument, warn}; use crate::{events::HotShotEvent, request::REQUEST_TIMEOUT}; @@ -699,9 +699,7 @@ pub(crate) async fn update_view( debug!("Updating view from {} to {}", *old_view, *new_view); if *old_view / 100 != *new_view / 100 { - // TODO (https://github.com/EspressoSystems/HotShot/issues/2296): - // switch to info! when INFO logs become less cluttered - error!("Progress: entered view {:>6}", *new_view); + info!("Progress: entered view {:>6}", *new_view); } *cur_view = new_view; diff --git a/crates/task-impls/src/network.rs b/crates/task-impls/src/network.rs index 40597b75b2..6894f0efb0 100644 --- a/crates/task-impls/src/network.rs +++ b/crates/task-impls/src/network.rs @@ -170,9 +170,6 @@ impl NetworkMessageTaskState { } }, }; - // TODO (Keyao benchmarking) Update these event variants (similar to the - // `TransactionsRecv` event) so we can send one event for a vector of messages. - // broadcast_event(Arc::new(event), &self.internal_event_stream).await; } @@ -298,7 +295,7 @@ impl< sender: sender.clone(), kind: MessageKind::::from_consensus_message(SequencingMessage::Da( DaConsensusMessage::VidDisperseMsg(proposal), - )), // TODO not a DaConsensusMessage https://github.com/EspressoSystems/HotShot/issues/1696 + )), }; let serialized_message = match self.upgrade_lock.serialize(&message).await { Ok(serialized) => serialized, @@ -363,7 +360,7 @@ impl< /// which will be used to create a message and transmit on the wire. /// Returns `None` if the parsing result should not be sent on the wire. /// Handles the `VidDisperseSend` event separately using a helper method. - #[allow(clippy::too_many_lines)] // TODO https://github.com/EspressoSystems/HotShot/issues/1704 + #[allow(clippy::too_many_lines)] async fn parse_event( &mut self, event: Arc>, diff --git a/crates/task-impls/src/view_sync.rs b/crates/task-impls/src/view_sync.rs index 134766283b..0b8452a2f1 100644 --- a/crates/task-impls/src/view_sync.rs +++ b/crates/task-impls/src/view_sync.rs @@ -300,7 +300,6 @@ impl, V: Versions> ViewSyncTaskSta // We do not have a relay task already running, so start one if self.membership.leader(vote_view + relay) != self.public_key { - // TODO ED This will occur because everyone is pulling down votes for now. Will be fixed in `https://github.com/EspressoSystems/HotShot/issues/1471` debug!("View sync vote sent to wrong leader"); return; } @@ -339,7 +338,6 @@ impl, V: Versions> ViewSyncTaskSta // We do not have a relay task already running, so start one if self.membership.leader(vote_view + relay) != self.public_key { - // TODO ED This will occur because everyone is pulling down votes for now. Will be fixed in `https://github.com/EspressoSystems/HotShot/issues/1471` debug!("View sync vote sent to wrong leader"); return; } @@ -378,7 +376,6 @@ impl, V: Versions> ViewSyncTaskSta // We do not have a relay task already running, so start one if self.membership.leader(vote_view + relay) != self.public_key { - // TODO ED This will occur because everyone is pulling down votes for now. Will be fixed in `https://github.com/EspressoSystems/HotShot/issues/1471` debug!("View sync vote sent to wrong leader"); return; } diff --git a/crates/testing/src/helpers.rs b/crates/testing/src/helpers.rs index a5a992784c..5c41e58380 100644 --- a/crates/testing/src/helpers.rs +++ b/crates/testing/src/helpers.rs @@ -38,7 +38,7 @@ use hotshot_types::{ node_implementation::{NodeType, Versions}, }, utils::{View, ViewInner}, - vid::{vid_scheme, VidCommitment, VidSchemeType}, + vid::{vid_scheme, VidCommitment, VidProposal, VidSchemeType}, vote::{Certificate, HasViewNumber, Vote}, }; use jf_vid::VidScheme; @@ -291,16 +291,12 @@ pub fn build_payload_commitment( } /// TODO: -#[allow(clippy::type_complexity)] pub fn build_vid_proposal( quorum_membership: &::Membership, view_number: TYPES::Time, transactions: Vec, private_key: &::PrivateKey, -) -> ( - Proposal>, - Vec>>, -) { +) -> VidProposal { let mut vid = vid_scheme_from_view_number::(quorum_membership, view_number); let encoded_transactions = TestTransaction::encode(&transactions); diff --git a/crates/testing/src/test_builder.rs b/crates/testing/src/test_builder.rs index df05b8414f..ea14c428be 100644 --- a/crates/testing/src/test_builder.rs +++ b/crates/testing/src/test_builder.rs @@ -335,8 +335,6 @@ impl, V: Versions> TestDescription pub fn default_multiple_rounds() -> Self { let num_nodes_with_stake = 10; TestDescription:: { - // TODO: remove once we have fixed the DHT timeout issue - // https://github.com/EspressoSystems/HotShot/issues/2088 num_bootstrap_nodes: num_nodes_with_stake, num_nodes_with_stake, start_nodes: num_nodes_with_stake, @@ -371,8 +369,6 @@ impl, V: Versions> TestDescription // The first 14 (i.e., 20 - f) nodes are in the DA committee and we may shutdown the // remaining 6 (i.e., f) nodes. We could remove this restriction after fixing the // following issue. - // TODO: Update message broadcasting to avoid hanging - // da_staked_committee_size: 14, completion_task_description: CompletionTaskDescription::TimeBasedCompletionTaskBuilder( TimeBasedCompletionTaskDescription { diff --git a/crates/testing/tests/tests_1/test_with_failures_2.rs b/crates/testing/tests/tests_1/test_with_failures_2.rs index 8573af51f5..c922b7fd6e 100644 --- a/crates/testing/tests/tests_1/test_with_failures_2.rs +++ b/crates/testing/tests/tests_1/test_with_failures_2.rs @@ -46,8 +46,6 @@ cross_tests!( // The first 14 (i.e., 20 - f) nodes are in the DA committee and we may shutdown the // remaining 6 (i.e., f) nodes. We could remove this restriction after fixing the // following issue. - // TODO: Update message broadcasting to avoid hanging - // let dead_nodes = vec![ ChangeNode { idx: 10, diff --git a/crates/testing/tests/tests_2/test_with_failures_one.rs b/crates/testing/tests/tests_2/test_with_failures_one.rs index c540c9cbfc..4fc96e5564 100644 --- a/crates/testing/tests/tests_2/test_with_failures_one.rs +++ b/crates/testing/tests/tests_2/test_with_failures_one.rs @@ -28,8 +28,6 @@ cross_tests!( // The first 14 (i.e., 20 - f) nodes are in the DA committee and we may shutdown the // remaining 6 (i.e., f) nodes. We could remove this restriction after fixing the // following issue. - // TODO: Update message broadcasting to avoid hanging - // let dead_nodes = vec![ChangeNode { idx: 19, updown: NodeAction::Down, diff --git a/crates/testing/tests/tests_3/test_with_failures_half_f.rs b/crates/testing/tests/tests_3/test_with_failures_half_f.rs index 8b1eb531a2..797aa77cab 100644 --- a/crates/testing/tests/tests_3/test_with_failures_half_f.rs +++ b/crates/testing/tests/tests_3/test_with_failures_half_f.rs @@ -27,8 +27,6 @@ cross_tests!( // The first 14 (i.e., 20 - f) nodes are in the DA committee and we may shutdown the // remaining 6 (i.e., f) nodes. We could remove this restriction after fixing the // following issue. - // TODO: Update message broadcasting to avoid hanging - // let dead_nodes = vec![ ChangeNode { idx: 17, diff --git a/crates/testing/tests/tests_4/test_with_failures_f.rs b/crates/testing/tests/tests_4/test_with_failures_f.rs index 931d7eaf5e..6ea256fc2d 100644 --- a/crates/testing/tests/tests_4/test_with_failures_f.rs +++ b/crates/testing/tests/tests_4/test_with_failures_f.rs @@ -30,8 +30,6 @@ cross_tests!( // The first 14 (i.e., 20 - f) nodes are in the DA committee and we may shutdown the // remaining 6 (i.e., f) nodes. We could remove this restriction after fixing the // following issue. - // TODO: Update message broadcasting to avoid hanging - // let dead_nodes = vec![ ChangeNode { idx: 14, diff --git a/crates/testing/tests/tests_5/timeout.rs b/crates/testing/tests/tests_5/timeout.rs index 2269d5000d..791e9551bd 100644 --- a/crates/testing/tests/tests_5/timeout.rs +++ b/crates/testing/tests/tests_5/timeout.rs @@ -7,8 +7,6 @@ #[cfg(test)] #[cfg_attr(async_executor_impl = "tokio", tokio::test(flavor = "multi_thread"))] #[cfg_attr(async_executor_impl = "async-std", async_std::test)] -// TODO Add memory network tests after this issue is finished: -// https://github.com/EspressoSystems/HotShot/issues/1790 async fn test_timeout() { use std::time::Duration; @@ -56,8 +54,6 @@ async fn test_timeout() { }, ); - // TODO ED Test with memory network once issue is resolved - // https://github.com/EspressoSystems/HotShot/issues/1790 metadata .gen_launcher(0) .launch() @@ -120,8 +116,6 @@ async fn test_timeout_libp2p() { }, ); - // TODO ED Test with memory network once issue is resolved - // https://github.com/EspressoSystems/HotShot/issues/1790 metadata .gen_launcher(0) .launch() diff --git a/crates/types/src/data.rs b/crates/types/src/data.rs index ca8f746008..a51b512ae4 100644 --- a/crates/types/src/data.rs +++ b/crates/types/src/data.rs @@ -638,48 +638,6 @@ impl Leaf { self.block_header().payload_commitment() } - // TODO: Replace this function with `extends_upgrade` after the following issue is done: - // https://github.com/EspressoSystems/HotShot/issues/3357. - /// Validate that a leaf has the right upgrade certificate to be the immediate child of another leaf - /// - /// This may not be a complete function. Please double-check that it performs the checks you expect before subtituting validation logic with it. - /// - /// # Errors - /// Returns an error if the certificates are not identical, or that when we no longer see a - /// cert, it's for the right reason. - pub fn temp_extends_upgrade( - &self, - parent: &Self, - decided_upgrade_certificate: &Option>, - ) -> Result<()> { - match (self.upgrade_certificate(), parent.upgrade_certificate()) { - // Easiest cases are: - // - no upgrade certificate on either: this is the most common case, and is always fine. - // - if the parent didn't have a certificate, but we see one now, it just means that we have begun an upgrade: again, this is always fine. - (None | Some(_), None) => {} - // If we no longer see a cert, we have to make sure that we either: - // - no longer care because we have passed new_version_first_view, or - // - no longer care because we have passed `decide_by` without deciding the certificate. - (None, Some(parent_cert)) => { - ensure!(self.view_number() > parent_cert.data.new_version_first_view - || (self.view_number() > parent_cert.data.decide_by && decided_upgrade_certificate.is_none()), - "The new leaf is missing an upgrade certificate that was present in its parent, and should still be live." - ); - } - // If we both have a certificate, they should be identical. - // Technically, this prevents us from initiating a new upgrade in the view immediately following an upgrade. - // I think this is a fairly lax restriction. - (Some(cert), Some(parent_cert)) => { - ensure!(cert == parent_cert, "The new leaf does not extend the parent leaf, because it has attached a different upgrade certificate."); - } - } - - // This check should be added once we sort out the genesis leaf/justify_qc issue. - // ensure!(self.parent_commitment() == parent_leaf.commit(), "The commitment of the parent leaf does not match the specified parent commitment."); - - Ok(()) - } - /// Validate that a leaf has the right upgrade certificate to be the immediate child of another leaf /// /// This may not be a complete function. Please double-check that it performs the checks you expect before subtituting validation logic with it. diff --git a/crates/types/src/error.rs b/crates/types/src/error.rs index 5127f1d68c..a73b0e1a88 100644 --- a/crates/types/src/error.rs +++ b/crates/types/src/error.rs @@ -82,8 +82,6 @@ pub enum HotShotError { threshold: NonZeroU64, }, /// Miscellaneous error - /// TODO fix this with - /// #181 Misc { /// source of error context: String, diff --git a/crates/types/src/message.rs b/crates/types/src/message.rs index 303e1d7073..70a73cec5f 100644 --- a/crates/types/src/message.rs +++ b/crates/types/src/message.rs @@ -218,7 +218,6 @@ pub enum DaConsensusMessage { /// Initiate VID dispersal. /// /// Like [`DaProposal`]. Use `Msg` suffix to distinguish from `VidDisperse`. - /// TODO this variant should not be a [`DaConsensusMessage`] because VidDisperseMsg(Proposal>), } diff --git a/crates/types/src/simple_certificate.rs b/crates/types/src/simple_certificate.rs index 5d87393ea7..f948e3a20b 100644 --- a/crates/types/src/simple_certificate.rs +++ b/crates/types/src/simple_certificate.rs @@ -198,27 +198,6 @@ impl Display for QuorumCertificate { } impl UpgradeCertificate { - // TODO: Replace this function with `is_relevant` after the following issue is done: - // https://github.com/EspressoSystems/HotShot/issues/3357. - /// Determines whether or not a certificate is relevant (i.e. we still have time to reach a - /// decide) - /// - /// # Errors - /// Returns an error when the certificate is no longer relevant - pub fn temp_is_relevant( - &self, - view_number: TYPES::Time, - decided_upgrade_certificate: Option, - ) -> Result<()> { - ensure!( - self.data.decide_by >= view_number - || decided_upgrade_certificate.is_some_and(|cert| cert == *self), - "Upgrade certificate is no longer relevant." - ); - - Ok(()) - } - /// Determines whether or not a certificate is relevant (i.e. we still have time to reach a /// decide) /// diff --git a/crates/types/src/vid.rs b/crates/types/src/vid.rs index 1d8afe2091..07f5b2cb1d 100644 --- a/crates/types/src/vid.rs +++ b/crates/types/src/vid.rs @@ -34,7 +34,10 @@ use lazy_static::lazy_static; use serde::{Deserialize, Serialize}; use sha2::Sha256; -use crate::constants::SRS_DEGREE; +use crate::{ + constants::SRS_DEGREE, data::VidDisperse as HotShotVidDisperse, data::VidDisperseShare, + message::Proposal, +}; /// VID scheme constructor. /// @@ -104,6 +107,11 @@ pub type VidCommon = ::Common; pub type VidShare = ::Share; /// VID PrecomputeData type pub type VidPrecomputeData = ::PrecomputeData; +/// VID proposal type +pub type VidProposal = ( + Proposal>, + Vec>>, +); #[cfg(not(feature = "gpu-vid"))] /// Internal Jellyfish VID scheme