diff --git a/Cargo.lock b/Cargo.lock index 03223ee5f..d72342620 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5767,6 +5767,8 @@ version = "0.1.0" dependencies = [ "alloy", "alloy-compat", + "alloy-primitives", + "alloy-serde", "anyhow", "async-stream", "axum", diff --git a/Cargo.toml b/Cargo.toml index 9cbc60fd4..7eca11df4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,6 +35,8 @@ alloy = { version = '0.3.0', default-features = false, features = [ "transport-http", "rpc-types-debug", ] } +alloy-primitives = "0.8.0" +alloy-serde = "0.3.0" anyhow = "1.0.86" async-stream = "0.3.5" axum = "0.7.5" @@ -50,6 +52,7 @@ criterion = "0.5.1" dotenvy = "0.15.7" either = "1.12.0" enum-as-inner = "0.6.0" +enumn = "0.1.13" env_logger = "0.11.3" eth_trie = "0.4.0" ethereum-types = "0.14.1" @@ -86,6 +89,7 @@ serde = "1.0.203" serde-big-array = "0.5.1" serde_json = "1.0.118" serde_path_to_error = "0.1.16" +serde_with = "3.8.1" sha2 = "0.10.8" static_assertions = "1.1.0" thiserror = "1.0.61" @@ -93,7 +97,7 @@ tiny-keccak = "2.0.2" tokio = { version = "1.38.0", features = ["full"] } toml = "0.8.14" tower = "0.4" -tracing = "0.1" +tracing = { version = "0.1", features = ["attributes"] } tracing-subscriber = { version = "0.3", features = ["env-filter"] } u4 = "0.1.0" uint = "0.9.5" diff --git a/evm_arithmetization/benches/fibonacci_25m_gas.rs b/evm_arithmetization/benches/fibonacci_25m_gas.rs index 2242b3049..26ed8136e 100644 --- a/evm_arithmetization/benches/fibonacci_25m_gas.rs +++ b/evm_arithmetization/benches/fibonacci_25m_gas.rs @@ -192,6 +192,7 @@ fn prepare_setup() -> anyhow::Result> { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: Default::default(), }) } diff --git a/evm_arithmetization/src/cpu/kernel/interpreter.rs b/evm_arithmetization/src/cpu/kernel/interpreter.rs index 78e11206e..9ff445fa2 100644 --- a/evm_arithmetization/src/cpu/kernel/interpreter.rs +++ b/evm_arithmetization/src/cpu/kernel/interpreter.rs @@ -5,11 +5,13 @@ //! the future execution and generate nondeterministically the corresponding //! jumpdest table, before the actual CPU carries on with contract execution. +use core::option::Option::None; use std::collections::{BTreeSet, HashMap}; use anyhow::anyhow; use ethereum_types::{BigEndianHash, U256}; -use log::Level; +use keccak_hash::H256; +use log::{trace, Level}; use mpt_trie::partial_trie::PartialTrie; use plonky2::hash::hash_types::RichField; use serde::{Deserialize, Serialize}; @@ -19,8 +21,10 @@ use crate::cpu::columns::CpuColumnsView; use crate::cpu::kernel::aggregator::KERNEL; use crate::cpu::kernel::constants::global_metadata::GlobalMetadata; use crate::generation::debug_inputs; +use crate::generation::jumpdest::{ContextJumpDests, JumpDestTableProcessed, JumpDestTableWitness}; use crate::generation::linked_list::LinkedListsPtrs; use crate::generation::mpt::{load_linked_lists_and_txn_and_receipt_mpts, TrieRootPtrs}; +use crate::generation::prover_input::get_proofs_and_jumpdests; use crate::generation::rlp::all_rlp_prover_inputs_reversed; use crate::generation::state::{ all_ger_prover_inputs, all_withdrawals_prover_inputs_reversed, GenerationState, @@ -57,6 +61,7 @@ pub(crate) struct Interpreter { /// Counts the number of appearances of each opcode. For debugging purposes. #[allow(unused)] pub(crate) opcode_count: [usize; 0x100], + /// A table of contexts and their reached JUMPDESTs. jumpdest_table: HashMap>, /// `true` if the we are currently carrying out a jumpdest analysis. pub(crate) is_jumpdest_analysis: bool, @@ -72,9 +77,9 @@ pub(crate) struct Interpreter { pub(crate) fn simulate_cpu_and_get_user_jumps( final_label: &str, state: &GenerationState, -) -> Option>> { +) -> Option<(JumpDestTableProcessed, JumpDestTableWitness)> { match state.jumpdest_table { - Some(_) => None, + Some(_) => Default::default(), None => { let halt_pc = KERNEL.global_labels[final_label]; let initial_context = state.registers.context; @@ -93,16 +98,17 @@ pub(crate) fn simulate_cpu_and_get_user_jumps( let clock = interpreter.get_clock(); - interpreter + let (a, jdtw) = interpreter .generation_state - .set_jumpdest_analysis_inputs(interpreter.jumpdest_table); + .get_jumpdest_analysis_inputs(interpreter.jumpdest_table.clone()); log::debug!( "Simulated CPU for jumpdest analysis halted after {:?} cycles.", clock ); - - interpreter.generation_state.jumpdest_table + // (interpreter.generation_state.jumpdest_table).map(|x| (x, jdtw)) + interpreter.generation_state.jumpdest_table = Some(a.clone()); + Some((a, jdtw)) } } } @@ -115,7 +121,7 @@ pub(crate) struct ExtraSegmentData { pub(crate) withdrawal_prover_inputs: Vec, pub(crate) ger_prover_inputs: Vec, pub(crate) trie_root_ptrs: TrieRootPtrs, - pub(crate) jumpdest_table: Option>>, + pub(crate) jumpdest_table: Option, pub(crate) access_lists_ptrs: LinkedListsPtrs, pub(crate) state_ptrs: LinkedListsPtrs, pub(crate) next_txn_index: usize, @@ -151,6 +157,60 @@ pub(crate) fn set_registers_and_run( interpreter.run() } +/// Computes the JUMPDEST proofs for each context. +/// +/// # Arguments +/// +/// - `jumpdest_table_rpc`: The raw table received from RPC. +/// - `code_db`: The corresponding database of contract code used in the trace. +pub(crate) fn get_jumpdest_analysis_inputs_rpc( + jumpdest_table_rpc: &JumpDestTableWitness, + code_map: &HashMap>, +) -> JumpDestTableProcessed { + let ctx_proofs = (*jumpdest_table_rpc) + .iter() + .flat_map(|(code_addr, ctx_jumpdests)| { + let code = if code_map.contains_key(code_addr) { + &code_map[code_addr] + } else { + &vec![] + }; + trace!( + "code: {:?}, code_addr: {:?}, {:?} <============", + &code, + &code_addr, + code_map.contains_key(code_addr), + ); + trace!("code_map: {:?}", &code_map); + prove_context_jumpdests(code, ctx_jumpdests) + }) + .collect(); + JumpDestTableProcessed::new(ctx_proofs) +} + +/// Orchestrates the proving of all contexts in a specific bytecode. +/// +/// # Arguments +/// +/// - `ctx_jumpdests`: Map from `ctx` to its list of offsets to reached +/// `JUMPDEST`s. +/// - `code`: The bytecode for the contexts. This is the same for all contexts. +fn prove_context_jumpdests( + code: &[u8], + ctx_jumpdests: &ContextJumpDests, +) -> HashMap> { + ctx_jumpdests + .0 + .iter() + .map(|(&ctx, jumpdests)| { + let proofs = jumpdests.last().map_or(Vec::default(), |&largest_address| { + get_proofs_and_jumpdests(code, largest_address, jumpdests.clone()) + }); + (ctx, proofs) + }) + .collect() +} + impl Interpreter { /// Returns an instance of `Interpreter` given `GenerationInputs`, and /// assuming we are initializing with the `KERNEL` code. diff --git a/evm_arithmetization/src/cpu/kernel/tests/add11.rs b/evm_arithmetization/src/cpu/kernel/tests/add11.rs index 683987244..a71f2d67d 100644 --- a/evm_arithmetization/src/cpu/kernel/tests/add11.rs +++ b/evm_arithmetization/src/cpu/kernel/tests/add11.rs @@ -193,6 +193,7 @@ fn test_add11_yml() { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: Default::default(), }; let initial_stack = vec![]; @@ -370,6 +371,7 @@ fn test_add11_yml_with_exception() { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: Default::default(), }; let initial_stack = vec![]; diff --git a/evm_arithmetization/src/cpu/kernel/tests/core/jumpdest_analysis.rs b/evm_arithmetization/src/cpu/kernel/tests/core/jumpdest_analysis.rs index f2d00ede5..d1b076118 100644 --- a/evm_arithmetization/src/cpu/kernel/tests/core/jumpdest_analysis.rs +++ b/evm_arithmetization/src/cpu/kernel/tests/core/jumpdest_analysis.rs @@ -10,13 +10,15 @@ use plonky2::hash::hash_types::RichField; use crate::cpu::kernel::aggregator::KERNEL; use crate::cpu::kernel::interpreter::Interpreter; use crate::cpu::kernel::opcodes::{get_opcode, get_push_opcode}; +use crate::generation::jumpdest::JumpDestTableProcessed; use crate::memory::segments::Segment; use crate::witness::memory::MemoryAddress; use crate::witness::operation::CONTEXT_SCALING_FACTOR; impl Interpreter { pub(crate) fn set_jumpdest_analysis_inputs(&mut self, jumps: HashMap>) { - self.generation_state.set_jumpdest_analysis_inputs(jumps); + let (jdtp, _jdtw) = self.generation_state.get_jumpdest_analysis_inputs(jumps); + self.generation_state.jumpdest_table = Some(jdtp); } pub(crate) fn get_jumpdest_bit(&self, offset: usize) -> U256 { @@ -106,7 +108,10 @@ fn test_jumpdest_analysis() -> Result<()> { interpreter.generation_state.jumpdest_table, // Context 3 has jumpdest 1, 5, 7. All have proof 0 and hence // the list [proof_0, jumpdest_0, ... ] is [0, 1, 0, 5, 0, 7, 8, 40] - Some(HashMap::from([(3, vec![0, 1, 0, 5, 0, 7, 8, 40])])) + Some(JumpDestTableProcessed::new(HashMap::from([( + 3, + vec![0, 1, 0, 5, 0, 7, 8, 40] + )]))) ); // Run jumpdest analysis with context = 3 @@ -123,14 +128,14 @@ fn test_jumpdest_analysis() -> Result<()> { // We need to manually pop the jumpdest_table and push its value on the top of // the stack - interpreter + (*interpreter .generation_state .jumpdest_table .as_mut() - .unwrap() - .get_mut(&CONTEXT) - .unwrap() - .pop(); + .unwrap()) + .get_mut(&CONTEXT) + .unwrap() + .pop(); interpreter .push(41.into()) .expect("The stack should not overflow"); @@ -175,7 +180,9 @@ fn test_packed_verification() -> Result<()> { let mut interpreter: Interpreter = Interpreter::new(write_table_if_jumpdest, initial_stack.clone(), None); interpreter.set_code(CONTEXT, code.clone()); - interpreter.generation_state.jumpdest_table = Some(HashMap::from([(3, vec![1, 33])])); + interpreter.generation_state.jumpdest_table = Some(JumpDestTableProcessed::new(HashMap::from( + [(3, vec![1, 33])], + ))); interpreter.run()?; @@ -188,7 +195,9 @@ fn test_packed_verification() -> Result<()> { let mut interpreter: Interpreter = Interpreter::new(write_table_if_jumpdest, initial_stack.clone(), None); interpreter.set_code(CONTEXT, code.clone()); - interpreter.generation_state.jumpdest_table = Some(HashMap::from([(3, vec![1, 33])])); + interpreter.generation_state.jumpdest_table = Some(JumpDestTableProcessed::new( + HashMap::from([(3, vec![1, 33])]), + )); assert!(interpreter.run().is_err()); diff --git a/evm_arithmetization/src/cpu/kernel/tests/init_exc_stop.rs b/evm_arithmetization/src/cpu/kernel/tests/init_exc_stop.rs index 2dea58b55..d254a19b1 100644 --- a/evm_arithmetization/src/cpu/kernel/tests/init_exc_stop.rs +++ b/evm_arithmetization/src/cpu/kernel/tests/init_exc_stop.rs @@ -101,6 +101,7 @@ fn test_init_exc_stop() { cur_hash: H256::default(), }, ger_data: None, + jumpdest_table: Default::default(), }; let initial_stack = vec![]; let initial_offset = KERNEL.global_labels["init"]; diff --git a/evm_arithmetization/src/generation/jumpdest.rs b/evm_arithmetization/src/generation/jumpdest.rs new file mode 100644 index 000000000..f980d51ac --- /dev/null +++ b/evm_arithmetization/src/generation/jumpdest.rs @@ -0,0 +1,212 @@ +//! EVM opcode 0x5B or 91 is [`JUMPDEST`] which encodes a a valid offset, that +//! opcodes `JUMP` and `JUMPI` can jump to. Jumps to non-[`JUMPDEST`] +//! instructions are invalid. During an execution a [`JUMPDEST`] may be visited +//! zero or more times. Offsets are measured in bytes with respect to the +//! beginning of some contract code, which is uniquely identified by its +//! `CodeHash`. Every time control flow is switches to another contract through +//! a `CALL`-like instruction a new call context, `Context`, is created. Thus, +//! the tripple (`CodeHash`,`Context`, `Offset`) uniquely identifies an visited +//! [`JUMPDEST`] offset of an execution. +//! +//! Since an operation like e.g. `PUSH 0x5B` does not encode a valid +//! [`JUMPDEST`] in its second byte, and `PUSH32 +//! 5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B` does not +//! encode valid [`JUMPDESTS`] in bytes 1-32, some diligence must be exercised +//! when proving validity of jump operations. +//! +//! This module concerns itself with data structures for collecting these +//! offsets for [`JUMPDEST`] that was visited during an execution and are not +//! recording duplicity. The proofs that each of these offsets are not rendered +//! invalid by any of the previous 32 bytes `PUSH1`-`PUSH32` is computed later +//! in [`prove_context_jumpdests`] on basis of these collections. +//! +//! [`JUMPDEST`]: https://www.evm.codes/?fork=cancun#5b + +use std::cmp::max; +use std::{ + collections::{BTreeSet, HashMap}, + fmt::Display, + ops::{Deref, DerefMut}, +}; + +use itertools::{sorted, Itertools}; +use keccak_hash::H256; +use serde::{Deserialize, Serialize}; + +/// Each `CodeHash` can be called one or more times, +/// each time creating a new `Context`. +/// Each `Context` will contain one or more offsets of `JUMPDEST`. +#[derive(PartialEq, Eq, Debug, Clone, Serialize, Deserialize, Default)] +pub struct ContextJumpDests(pub HashMap>); + +/// The result after proving a [`JumpDestTableWitness`]. +#[derive(PartialEq, Eq, Debug, Clone, Serialize, Deserialize, Default)] +pub(crate) struct JumpDestTableProcessed(HashMap>); + +/// Map `CodeHash -> (Context -> [JumpDests])` +#[derive(PartialEq, Eq, Debug, Clone, Serialize, Deserialize, Default)] +pub struct JumpDestTableWitness(HashMap); + +impl ContextJumpDests { + pub fn insert(&mut self, ctx: usize, offset: usize) { + self.entry(ctx).or_default().insert(offset); + } + + pub fn get(&self, ctx: usize) -> Option<&BTreeSet> { + self.0.get(&ctx) + } +} + +impl JumpDestTableProcessed { + pub fn new(ctx_map: HashMap>) -> Self { + Self(ctx_map) + } +} + +impl JumpDestTableWitness { + pub fn get(&self, code_hash: &H256) -> Option<&ContextJumpDests> { + self.0.get(code_hash) + } + + /// Insert `offset` into `ctx` under the corresponding `code_hash`. + /// Creates the required `ctx` keys and `code_hash`. Idempotent. + pub fn insert(&mut self, code_hash: H256, ctx: usize, offset: usize) { + (*self).entry(code_hash).or_default().insert(ctx, offset); + } + + pub fn extend(mut self, other: &Self, prev_max_ctx: usize) -> (Self, usize) { + let mut curr_max_ctx = prev_max_ctx; + + for (code_hash, ctx_tbl) in (*other).iter() { + for (ctx, jumpdests) in ctx_tbl.0.iter() { + let batch_ctx = prev_max_ctx + ctx; + curr_max_ctx = max(curr_max_ctx, batch_ctx); + + for offset in jumpdests { + self.insert(*code_hash, batch_ctx, *offset); + } + } + } + + (self, curr_max_ctx) + } + + pub fn merge<'a>(jdts: impl IntoIterator) -> (Self, usize) { + jdts.into_iter() + .fold((Default::default(), 0), |(acc, cnt), t| acc.extend(t, cnt)) + } +} + +impl Display for JumpDestTableWitness { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "\n=== JumpDestTableWitness ===")?; + + for (code, ctxtbls) in &self.0 { + write!(f, "codehash: {:#x}\n{}", code, ctxtbls)?; + } + Ok(()) + } +} + +impl Display for ContextJumpDests { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let v: Vec<_> = self.0.iter().sorted().collect(); + for (ctx, offsets) in v.into_iter() { + write!(f, " ctx: {:>4}: [", ctx)?; + for offset in offsets { + write!(f, "{:#}, ", offset)?; + } + writeln!(f, "]")?; + } + Ok(()) + } +} + +impl Display for JumpDestTableProcessed { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "\n=== JumpDestTableProcessed ===")?; + + let v = sorted(self.0.clone()); + for (ctx, code) in v { + writeln!(f, "ctx: {:?} {:?}", ctx, code)?; + } + Ok(()) + } +} + +impl Deref for ContextJumpDests { + type Target = HashMap>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for ContextJumpDests { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl Deref for JumpDestTableProcessed { + type Target = HashMap>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for JumpDestTableProcessed { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl Deref for JumpDestTableWitness { + type Target = HashMap; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for JumpDestTableWitness { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +#[cfg(test)] +mod test { + use keccak_hash::H256; + + use super::JumpDestTableWitness; + + #[test] + fn test_extend() { + let code_hash = H256::default(); + + let mut table1 = JumpDestTableWitness::default(); + table1.insert(code_hash, 1, 1); + table1.insert(code_hash, 2, 2); + table1.insert(code_hash, 42, 3); + table1.insert(code_hash, 43, 4); + let table2 = table1.clone(); + + let jdts = [&table1, &table2]; + let (actual, max_ctx) = JumpDestTableWitness::merge(jdts); + + let mut expected = JumpDestTableWitness::default(); + expected.insert(code_hash, 1, 1); + expected.insert(code_hash, 2, 2); + expected.insert(code_hash, 42, 3); + expected.insert(code_hash, 43, 4); + expected.insert(code_hash, 44, 1); + expected.insert(code_hash, 45, 2); + expected.insert(code_hash, 85, 3); + expected.insert(code_hash, 86, 4); + + assert_eq!(86, max_ctx); + assert_eq!(expected, actual) + } +} diff --git a/evm_arithmetization/src/generation/mod.rs b/evm_arithmetization/src/generation/mod.rs index 9279e88d9..6ab8e192a 100644 --- a/evm_arithmetization/src/generation/mod.rs +++ b/evm_arithmetization/src/generation/mod.rs @@ -3,6 +3,7 @@ use std::fmt::Display; use anyhow::anyhow; use ethereum_types::{Address, BigEndianHash, H256, U256}; +use jumpdest::JumpDestTableWitness; use keccak_hash::keccak; use log::error; use mpt_trie::partial_trie::{HashedPartialTrie, PartialTrie}; @@ -33,6 +34,7 @@ use crate::util::{h2u, u256_to_usize}; use crate::witness::memory::{MemoryAddress, MemoryChannel, MemoryState}; use crate::witness::state::RegistersState; +pub mod jumpdest; pub(crate) mod linked_list; pub mod mpt; pub(crate) mod prover_input; @@ -130,6 +132,10 @@ pub struct GenerationInputs { /// /// This is specific to `cdk-erigon`. pub ger_data: Option<(H256, H256)>, + + /// A table listing each JUMPDESTs reached in each call context under + /// associated code hash. + pub jumpdest_table: Option, } /// A lighter version of [`GenerationInputs`], which have been trimmed @@ -180,6 +186,10 @@ pub struct TrimmedGenerationInputs { /// The hash of the current block, and a list of the 256 previous block /// hashes. pub block_hashes: BlockHashes, + + /// A list of tables listing each JUMPDESTs reached in each call context + /// under associated code hash. + pub jumpdest_table: Option, } #[derive(Clone, Debug, Deserialize, Serialize, Default)] @@ -254,6 +264,7 @@ impl GenerationInputs { burn_addr: self.burn_addr, block_metadata: self.block_metadata.clone(), block_hashes: self.block_hashes.clone(), + jumpdest_table: self.jumpdest_table.clone(), } } } diff --git a/evm_arithmetization/src/generation/prover_input.rs b/evm_arithmetization/src/generation/prover_input.rs index 704e2f4c6..e4206cb15 100644 --- a/evm_arithmetization/src/generation/prover_input.rs +++ b/evm_arithmetization/src/generation/prover_input.rs @@ -6,10 +6,13 @@ use std::str::FromStr; use anyhow::{bail, Error, Result}; use ethereum_types::{BigEndianHash, H256, U256, U512}; use itertools::Itertools; +use keccak_hash::keccak; +use log::{info, trace}; use num_bigint::BigUint; use plonky2::hash::hash_types::RichField; use serde::{Deserialize, Serialize}; +use super::jumpdest::{JumpDestTableProcessed, JumpDestTableWitness}; #[cfg(test)] use super::linked_list::testing::{LinkedList, ADDRESSES_ACCESS_LIST_LEN}; use super::linked_list::{ @@ -22,7 +25,9 @@ use crate::cpu::kernel::constants::cancun_constants::{ POINT_EVALUATION_PRECOMPILE_RETURN_VALUE, }; use crate::cpu::kernel::constants::context_metadata::ContextMetadata; -use crate::cpu::kernel::interpreter::simulate_cpu_and_get_user_jumps; +use crate::cpu::kernel::interpreter::{ + get_jumpdest_analysis_inputs_rpc, simulate_cpu_and_get_user_jumps, +}; use crate::curve_pairings::{bls381, CurveAff, CyclicGroup}; use crate::extension_tower::{FieldExt, Fp12, Fp2, BLS381, BLS_BASE, BLS_SCALAR, BN254, BN_BASE}; use crate::generation::prover_input::EvmField::{ @@ -40,6 +45,9 @@ use crate::witness::memory::MemoryAddress; use crate::witness::operation::CONTEXT_SCALING_FACTOR; use crate::witness::util::{current_context_peek, stack_peek}; +/// A set to hold contract code as a byte vectors. +pub type CodeDb = BTreeSet>; + /// Prover input function represented as a scoped function name. /// Example: `PROVER_INPUT(ff::bn254_base::inverse)` is represented as /// `ProverInputFn([ff, bn254_base, inverse])`. @@ -351,9 +359,12 @@ impl GenerationState { } /// Returns the next used jump address. + /// todo fn run_next_jumpdest_table_address(&mut self) -> Result { let context = u256_to_usize(stack_peek(self, 0)? >> CONTEXT_SCALING_FACTOR)?; + // get_code from self.memory + if self.jumpdest_table.is_none() { self.generate_jumpdest_table()?; } @@ -364,12 +375,12 @@ impl GenerationState { )); }; - if let Some(ctx_jumpdest_table) = jumpdest_table.get_mut(&context) + if let Some(ctx_jumpdest_table) = (*jumpdest_table).get_mut(&context) && let Some(next_jumpdest_address) = ctx_jumpdest_table.pop() { Ok((next_jumpdest_address + 1).into()) } else { - jumpdest_table.remove(&context); + (*jumpdest_table).remove(&context); Ok(U256::zero()) } } @@ -383,7 +394,7 @@ impl GenerationState { )); }; - if let Some(ctx_jumpdest_table) = jumpdest_table.get_mut(&context) + if let Some(ctx_jumpdest_table) = (*jumpdest_table).get_mut(&context) && let Some(next_jumpdest_proof) = ctx_jumpdest_table.pop() { Ok(next_jumpdest_proof.into()) @@ -786,7 +797,30 @@ impl GenerationState { fn generate_jumpdest_table(&mut self) -> Result<(), ProgramError> { // Simulate the user's code and (unnecessarily) part of the kernel code, // skipping the validate table call - self.jumpdest_table = simulate_cpu_and_get_user_jumps("terminate_common", self); + + // REVIEW: This will be rewritten to only run simulation when + // `self.inputs.jumpdest_table` is `None`. + info!("Generating JUMPDEST tables"); + let rpcw = self.inputs.jumpdest_table.clone(); + let rpcp: Option = rpcw + .as_ref() + .map(|jdt| get_jumpdest_analysis_inputs_rpc(jdt, &self.inputs.contract_code)); + info!("Generating JUMPDEST tables: Running SIM"); + + self.inputs.jumpdest_table = None; + let sims = simulate_cpu_and_get_user_jumps("terminate_common", self); + + let (simp, ref simw): (Option, Option) = + sims.map_or_else(|| (None, None), |(sim, simw)| (Some(sim), Some(simw))); + + info!("Generating JUMPDEST tables: finished"); + + info!("SIMW {:#?}", &simw); + info!("RPCW {:#?}", &rpcw); + info!("SIMP {:#?}", &simp); + info!("RPCP {:#?}", &rpcp); + + self.jumpdest_table = if rpcp.is_some() { rpcp } else { simp }; Ok(()) } @@ -794,13 +828,19 @@ impl GenerationState { /// Given a HashMap containing the contexts and the jumpdest addresses, /// compute their respective proofs, by calling /// `get_proofs_and_jumpdests` - pub(crate) fn set_jumpdest_analysis_inputs( - &mut self, + pub(crate) fn get_jumpdest_analysis_inputs( + &self, jumpdest_table: HashMap>, - ) { - self.jumpdest_table = Some(HashMap::from_iter(jumpdest_table.into_iter().map( + ) -> (JumpDestTableProcessed, JumpDestTableWitness) { + let mut jdtw = JumpDestTableWitness::default(); + let jdtp = JumpDestTableProcessed::new(HashMap::from_iter(jumpdest_table.into_iter().map( |(ctx, jumpdest_table)| { let code = self.get_code(ctx).unwrap(); + let code_hash = keccak(code.clone()); + trace!("ctx: {ctx}, code_hash: {:?} code: {:?}", code_hash, code); + for offset in jumpdest_table.clone() { + jdtw.insert(code_hash, ctx, offset); + } if let Some(&largest_address) = jumpdest_table.last() { let proofs = get_proofs_and_jumpdests(&code, largest_address, jumpdest_table); (ctx, proofs) @@ -809,6 +849,7 @@ impl GenerationState { } }, ))); + (jdtp, jdtw) } pub(crate) fn get_current_code(&self) -> Result, ProgramError> { @@ -855,7 +896,7 @@ impl GenerationState { /// for which none of the previous 32 bytes in the code (including opcodes /// and pushed bytes) is a PUSHXX and the address is in its range. It returns /// a vector of even size containing proofs followed by their addresses. -fn get_proofs_and_jumpdests( +pub(crate) fn get_proofs_and_jumpdests( code: &[u8], largest_address: usize, jumpdest_table: std::collections::BTreeSet, diff --git a/evm_arithmetization/src/generation/state.rs b/evm_arithmetization/src/generation/state.rs index 1ea87bd0c..d554ff60f 100644 --- a/evm_arithmetization/src/generation/state.rs +++ b/evm_arithmetization/src/generation/state.rs @@ -8,6 +8,7 @@ use keccak_hash::keccak; use log::Level; use plonky2::hash::hash_types::RichField; +use super::jumpdest::JumpDestTableProcessed; use super::linked_list::LinkedListsPtrs; use super::mpt::TrieRootPtrs; use super::segments::GenerationSegmentData; @@ -374,7 +375,7 @@ pub struct GenerationState { /// "proof" for a jump destination is either 0 or an address i > 32 in /// the code (not necessarily pointing to an opcode) such that for every /// j in [i, i+32] it holds that code[j] < 0x7f - j + i. - pub(crate) jumpdest_table: Option>>, + pub(crate) jumpdest_table: Option, /// Provides quick access to pointers that reference the location /// of either and account or a slot in the respective access list. diff --git a/evm_arithmetization/src/lib.rs b/evm_arithmetization/src/lib.rs index 1a6a4a38e..fae3778d8 100644 --- a/evm_arithmetization/src/lib.rs +++ b/evm_arithmetization/src/lib.rs @@ -210,6 +210,9 @@ pub mod verifier; pub mod generation; pub mod witness; +pub use generation::jumpdest; +pub use generation::prover_input::CodeDb; + // Utility modules pub mod curve_pairings; pub mod extension_tower; diff --git a/evm_arithmetization/tests/add11_yml.rs b/evm_arithmetization/tests/add11_yml.rs index dd9bfb1ce..5c955d482 100644 --- a/evm_arithmetization/tests/add11_yml.rs +++ b/evm_arithmetization/tests/add11_yml.rs @@ -200,6 +200,7 @@ fn get_generation_inputs() -> GenerationInputs { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: Default::default(), } } /// The `add11_yml` test case from https://github.com/ethereum/tests diff --git a/evm_arithmetization/tests/erc20.rs b/evm_arithmetization/tests/erc20.rs index 2baf716e7..b86dcb59f 100644 --- a/evm_arithmetization/tests/erc20.rs +++ b/evm_arithmetization/tests/erc20.rs @@ -195,6 +195,7 @@ fn test_erc20() -> anyhow::Result<()> { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: Default::default(), }; let max_cpu_len_log = 20; diff --git a/evm_arithmetization/tests/erc721.rs b/evm_arithmetization/tests/erc721.rs index e416dc87a..ba139f6d2 100644 --- a/evm_arithmetization/tests/erc721.rs +++ b/evm_arithmetization/tests/erc721.rs @@ -199,6 +199,7 @@ fn test_erc721() -> anyhow::Result<()> { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: Default::default(), }; let max_cpu_len_log = 20; diff --git a/evm_arithmetization/tests/global_exit_root.rs b/evm_arithmetization/tests/global_exit_root.rs index b7e82c7a0..c1f656efe 100644 --- a/evm_arithmetization/tests/global_exit_root.rs +++ b/evm_arithmetization/tests/global_exit_root.rs @@ -113,6 +113,7 @@ fn test_global_exit_root() -> anyhow::Result<()> { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + batch_jumpdest_table: None, }; let max_cpu_len_log = 20; diff --git a/evm_arithmetization/tests/log_opcode.rs b/evm_arithmetization/tests/log_opcode.rs index 81daf588c..afceefca3 100644 --- a/evm_arithmetization/tests/log_opcode.rs +++ b/evm_arithmetization/tests/log_opcode.rs @@ -266,6 +266,7 @@ fn test_log_opcodes() -> anyhow::Result<()> { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: Default::default(), }; let max_cpu_len_log = 20; diff --git a/evm_arithmetization/tests/selfdestruct.rs b/evm_arithmetization/tests/selfdestruct.rs index f97dd41cd..6a13ace55 100644 --- a/evm_arithmetization/tests/selfdestruct.rs +++ b/evm_arithmetization/tests/selfdestruct.rs @@ -170,6 +170,7 @@ fn test_selfdestruct() -> anyhow::Result<()> { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: Default::default(), }; let max_cpu_len_log = 20; diff --git a/evm_arithmetization/tests/simple_transfer.rs b/evm_arithmetization/tests/simple_transfer.rs index a1e7fb655..56c8ba0ae 100644 --- a/evm_arithmetization/tests/simple_transfer.rs +++ b/evm_arithmetization/tests/simple_transfer.rs @@ -162,6 +162,7 @@ fn test_simple_transfer() -> anyhow::Result<()> { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: Default::default(), }; let max_cpu_len_log = 20; diff --git a/evm_arithmetization/tests/withdrawals.rs b/evm_arithmetization/tests/withdrawals.rs index 01b48c0c6..d2d461030 100644 --- a/evm_arithmetization/tests/withdrawals.rs +++ b/evm_arithmetization/tests/withdrawals.rs @@ -105,6 +105,7 @@ fn test_withdrawals() -> anyhow::Result<()> { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: Default::default(), }; let max_cpu_len_log = 20; diff --git a/scripts/prove_stdio.sh b/scripts/prove_stdio.sh index 7249dce92..077ce9261 100755 --- a/scripts/prove_stdio.sh +++ b/scripts/prove_stdio.sh @@ -17,27 +17,30 @@ else num_procs=$(nproc) fi -# Force the working directory to always be the `tools/` directory. +# Force the working directory to always be the `tools/` directory. REPO_ROOT=$(git rev-parse --show-toplevel) PROOF_OUTPUT_DIR="${REPO_ROOT}/proofs" -BLOCK_BATCH_SIZE="${BLOCK_BATCH_SIZE:-8}" +BLOCK_BATCH_SIZE="${BLOCK_BATCH_SIZE:-1}" echo "Block batch size: $BLOCK_BATCH_SIZE" +BATCH_SIZE=${BATCH_SIZE:-1} +echo "Batch size: $BATCH_SIZE" + OUTPUT_LOG="${REPO_ROOT}/output.log" PROOFS_FILE_LIST="${PROOF_OUTPUT_DIR}/proof_files.json" -TEST_OUT_PATH="${REPO_ROOT}/test.out" +TEST_OUT_PATH="${REPO_ROOT}/$3.test.out" # Configured Rayon and Tokio with rough defaults export RAYON_NUM_THREADS=$num_procs export TOKIO_WORKER_THREADS=$num_procs -export RUST_MIN_STACK=33554432 +#export RUST_MIN_STACK=33554432 export RUST_BACKTRACE=full export RUST_LOG=info # Script users are running locally, and might benefit from extra perf. # See also .cargo/config.toml. -export RUSTFLAGS='-C target-cpu=native -Zlinker-features=-lld' +export RUSTFLAGS='-C target-cpu=native -Z linker-features=-lld' INPUT_FILE=$1 TEST_ONLY=$2 @@ -95,23 +98,25 @@ fi # proof. This is useful for quickly testing decoding and all of the # other non-proving code. if [[ $TEST_ONLY == "test_only" ]]; then - cargo run --quiet --release --package zero --bin leader -- --test-only --runtime in-memory --load-strategy on-demand --block-batch-size $BLOCK_BATCH_SIZE --proof-output-dir $PROOF_OUTPUT_DIR stdio < $INPUT_FILE &> $TEST_OUT_PATH + cargo run --release --package zero --bin leader -- --test-only --runtime in-memory --load-strategy on-demand --block-batch-size $BLOCK_BATCH_SIZE --proof-output-dir $PROOF_OUTPUT_DIR --batch-size $BATCH_SIZE --save-inputs-on-error stdio < $INPUT_FILE |& tee &> $TEST_OUT_PATH if grep -q 'All proof witnesses have been generated successfully.' $TEST_OUT_PATH; then echo -e "\n\nSuccess - Note this was just a test, not a proof" - rm $TEST_OUT_PATH + #rm $TEST_OUT_PATH exit - else - echo "Failed to create proof witnesses. See \"zk_evm/tools/test.out\" for more details." + elif grep -q 'Proving task finished with error' $TEST_OUT_PATH; then + echo -e "\n\nFailed to create proof witnesses. See \"zk_evm/test.out\" for more details." exit 1 + else + echo -e "\n\nUndecided. Proving process has stopped but verdict is undecided. See \"zk_evm/test.out\" for more details." + exit 2 fi fi cargo build --release --jobs "$num_procs" - start_time=$(date +%s%N) "${REPO_ROOT}/target/release/leader" --runtime in-memory --load-strategy on-demand --block-batch-size $BLOCK_BATCH_SIZE \ - --proof-output-dir $PROOF_OUTPUT_DIR stdio < $INPUT_FILE &> $OUTPUT_LOG + --proof-output-dir $PROOF_OUTPUT_DIR stdio < $INPUT_FILE |& tee $OUTPUT_LOG end_time=$(date +%s%N) set +o pipefail diff --git a/scripts/test_jerigon.sh b/scripts/test_jerigon.sh new file mode 100755 index 000000000..8e02977c8 --- /dev/null +++ b/scripts/test_jerigon.sh @@ -0,0 +1,329 @@ +#!/usr/bin/env bash + +set -uo pipefail + +RPC=${RPC_JERIGON} +if [ -z $RPC ]; then + # You must set an RPC endpoint + exit 1 +fi +mkdir -p witnesses + +# Must match the values in prove_stdio.sh or build is dirty. +export RAYON_NUM_THREADS=1 +export TOKIO_WORKER_THREADS=1 +export RUST_BACKTRACE=full +#export RUST_LOG=info +#export RUSTFLAGS='-C target-cpu=native -Zlinker-features=-lld' +#export RUST_MIN_STACK=67108864 + +GITHASH=`git rev-parse --short HEAD` +echo "Testing against jergion, current revision: $GITHASH." + +CIBLOCKS=" +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +" + + +STILLFAIL=" +37 +75 +15 +35 +43 +72 +77 +184 +460 +461 +462 +463 +464 +465 +467 +468 +474 +475 +476 +566 +662 +664 +665 +667 +670 +477 +478 +444 +" + +JUMPI=" +662 +664 +665 +667 +670 +" + +CONTAINSKEY=" +461 +462 +463 +464 +465 +467 +468 +474 +475 +476 +72 +" + +CREATE2=" +43 +566 +77 +" + +DECODING=" +477 +478 +" + +USEDTOFAIL=" +2 +15 +28 +35 +37 +43 +65 + +28 + +43 +460 +461 +462 +463 +464 +465 +467 +468 +474 +475 +476 +566 +662 +664 +665 +667 +670 +72 +77 +" + +ROUND2=" +664 +667 +670 +665 +" + +NOWSUCCESS=" +444 +4 +5 +28 +65 +566 +15 +35 +" + +ROUND3=" +125 +127 +131 +132 +136 +141 +142 +143 +145 +149 +150 +151 +153 +154 +186 +187 +188 +190 +193 +195 +197 +199 +201 +214 +220 +221 +222 +223 +226 +228 +229 +230 +231 +232 +234 +242 +256 +257 +258 +262 +264 +267 +268 +282 +284 +285 +287 +292 +294 +295 +301 +303 +304 +321 +325 +333 +460 +461 +462 +463 +464 +465 +466 +467 +468 +473 +474 +528 +529 +530 +531 +532 +533 +534 +566 +570 +664 +77 +548 +" + +ROUND4=" +136 +186 +268 +282 +301 +304 +321 +333 +460 +461 +462 +463 +464 +465 +466 +467 +468 +473 +474 +528 +529 +530 +531 +532 +533 +534 +570 +664 +" + +ROUND5=" +460 +461 +462 +463 +464 +465 +466 +467 +468 +473 +474 +664 +" + +ROUND6=" +664 +" + +# 470..663 from Robin +for i in {470..663} +do + ROBIN+=" $i" +done + +TIP=688 +NUMRANDOMBLOCKS=10 +RANDOMBLOCKS=`shuf --input-range=0-$TIP -n $NUMRANDOMBLOCKS | sort` + +#$CREATE2 $DECODING $CONTAINSKEY $USEDTOFAIL $STILLFAIL $CIBLOCKS $JUMPI $ROUND2 $RANDOMBLOCKS $ROUND3" +BLOCKS="$ROUND6" +BLOCKS=`echo $BLOCKS | tr ' ' '\n' | sort -nu | tr '\n' ' '` + +echo "Testing: $BLOCKS" +printf "\ngithash block verdict duration\n" | tee -a witnesses/jerigon_results.txt +echo "------------------------------------" | tee -a witnesses/jerigon_results.txt + +for BLOCK in $BLOCKS; do + GITHASH=`git rev-parse --short HEAD` + WITNESS="witnesses/$BLOCK.jerigon.$GITHASH.witness.json" + echo "Fetching block $BLOCK" + export RUST_LOG=rpc=trace + SECONDS=0 + cargo run --quiet --release --bin rpc -- --backoff 3000 --max-retries 100 --rpc-url $RPC --rpc-type jerigon --jumpdest-src client-fetched-structlogs fetch --start-block $BLOCK --end-block $BLOCK 1> $WITNESS + echo "Testing blocks: $BLOCKS." + echo "Now testing block $BLOCK .." + export RUST_LOG=info + timeout 10m ./prove_stdio.sh $WITNESS test_only $BLOCK + EXITCODE=$? + DURATION=`date -u -d @"$SECONDS" +'%-Hh%-Mm%-Ss'` + echo $DURATION + if [ $EXITCODE -eq 0 ] + then + VERDICT="success" + else + VERDICT="failure" + fi + printf "%s %10i %s %s\n" $GITHASH $BLOCK $VERDICT $DURATION | tee -a witnesses/jerigon_results.txt +done diff --git a/scripts/test_native.sh b/scripts/test_native.sh new file mode 100755 index 000000000..7f092f232 --- /dev/null +++ b/scripts/test_native.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash + +set -uo pipefail + +RPC=${RPC_NATIVE} +if [ -z $RPC ]; then + # You must set an RPC endpoint + exit 1 +fi +mkdir -p witnesses + +# Must match the values in prove_stdio.sh or build is dirty. +#export RAYON_NUM_THREADS=1 +#export TOKIO_WORKER_THREADS=1 +export RUST_BACKTRACE=full +#export RUST_LOG=info +#export RUSTFLAGS='-C target-cpu=native -Zlinker-features=-lld' +#export RUST_MIN_STACK=33554432 + + + +CANCUNBLOCKS=" +20548415 +20240058 +19665756 +20634472 +19807080 +20634403 +" + +PRECANCUN=" +19096840 +19240700 +" + +#It's visible with block 20727641 +ROUND1=`echo {20727640..20727650}` + + + +CANCUN=19426587 +TIP=`cast block-number --rpc-url $RPC` +STATICTIP=20721266 +NUMRANDOMBLOCKS=100 +RANDOMBLOCKS=`shuf --input-range=$CANCUN-$TIP -n $NUMRANDOMBLOCKS | sort` + +GITHASH=`git rev-parse --short HEAD` +echo "Testing against mainnet, current revision: $GITHASH." + +#BLOCKS="$CANCUNBLOCKS $RANDOMBLOCKS" +BLOCKS="$ROUND1" +#BLOCKS="$CANCUNBLOCKS" +echo "Testing blocks: $BLOCKS" + +echo "Testing: $BLOCKS" +printf "githash block verdict duration\n" | tee -a witnesses/native_results.txt +echo "------------------------------------" | tee -a witnesses/native_results.txt + +for BLOCK in $BLOCKS; do + GITHASH=`git rev-parse --short HEAD` + WITNESS="witnesses/$BLOCK.native.$GITHASH.witness.json" + echo "Fetching block $BLOCK" + export RUST_LOG=rpc=trace + cargo run --quiet --release --bin rpc -- --backoff 3000 --max-retries 100 --rpc-url $RPC --rpc-type native fetch --start-block $BLOCK --end-block $BLOCK 1> $WITNESS + echo "Testing blocks: $BLOCKS." + echo "Now testing block $BLOCK .." + export RUST_LOG=info + SECONDS=0 + timeout 30m ./prove_stdio.sh $WITNESS test_only + EXITCODE=$? + DURATION=`date -u -d @"$SECONDS" +'%-Hh%-Mm%-Ss'` + echo $DURATION + if [ $EXITCODE -eq 0 ] + then + VERDICT="success" + else + VERDICT="failure" + fi + printf "%s %10i %s %s\n" $GITHASH $BLOCK $VERDICT $DURATION | tee -a witnesses/native_results.txt +done diff --git a/trace_decoder/src/core.rs b/trace_decoder/src/core.rs index d96778dec..4b3303ddd 100644 --- a/trace_decoder/src/core.rs +++ b/trace_decoder/src/core.rs @@ -1,15 +1,21 @@ +use core::{convert::Into as _, option::Option::None}; use std::{ cmp, collections::{BTreeMap, BTreeSet, HashMap}, mem, }; -use alloy::primitives::address; +use alloy::{ + consensus::{Transaction, TxEnvelope}, + primitives::{address, TxKind}, + rlp::Decodable as _, +}; use alloy_compat::Compat as _; use anyhow::{anyhow, bail, ensure, Context as _}; use ethereum_types::{Address, H160, U256}; use evm_arithmetization::{ generation::{mpt::AccountRlp, TrieInputs}, + jumpdest::JumpDestTableWitness, proof::{BlockMetadata, TrieRoots}, GenerationInputs, }; @@ -27,6 +33,23 @@ use crate::{ TxnInfo, TxnMeta, TxnTrace, }; +/// Addresses of precompiled Ethereum contracts. +pub fn is_precompile(addr: H160) -> bool { + let precompiled_addresses = if cfg!(feature = "eth_mainnet") { + address!("0000000000000000000000000000000000000001") + ..address!("000000000000000000000000000000000000000a") + } else { + // Remove KZG Peval for non-Eth mainnet networks + address!("0000000000000000000000000000000000000001") + ..address!("0000000000000000000000000000000000000009") + }; + precompiled_addresses.contains(&addr.compat()) + || (cfg!(feature = "polygon_pos") + // Include P256Verify for Polygon PoS + && addr.compat() + == address!("0000000000000000000000000000000000000100")) +} + /// TODO(0xaatif): document this after https://github.com/0xPolygonZero/zk_evm/issues/275 pub fn entrypoint( trace: BlockTrace, @@ -42,7 +65,7 @@ pub fn entrypoint( txn_info, } = trace; let (state, storage, mut code) = start(trie_pre_images)?; - code.extend(code_db); + code.extend(code_db.clone()); let OtherBlockData { b_data: @@ -90,6 +113,7 @@ pub fn entrypoint( }, after, withdrawals, + jumpdest_tables, }| GenerationInputs { txn_number_before: first_txn_ix.into(), gas_used_before: running_gas_used.into(), @@ -97,7 +121,7 @@ pub fn entrypoint( running_gas_used += gas_used; running_gas_used.into() }, - signed_txns: byte_code.into_iter().map(Into::into).collect(), + signed_txns: byte_code.clone().into_iter().map(Into::into).collect(), withdrawals, ger_data, tries: TrieInputs { @@ -109,13 +133,57 @@ pub fn entrypoint( trie_roots_after: after, checkpoint_state_trie_root, checkpoint_consolidated_hash, - contract_code: contract_code - .into_iter() - .map(|it| (keccak_hash::keccak(&it), it)) - .collect(), + contract_code: { + let initcodes = + byte_code + .iter() + .filter_map(|nonempty_txn_bytes| -> Option> { + let tx_envelope = + TxEnvelope::decode(&mut &nonempty_txn_bytes[..]).unwrap(); + match tx_envelope.to() { + TxKind::Create => Some(tx_envelope.input().to_vec()), + TxKind::Call(_address) => None, + } + }); + + // TODO convert to Hash2Code + let initmap: HashMap<_, _> = initcodes + .into_iter() + .map(|it| (keccak_hash::keccak(&it), it)) + .collect(); + log::trace!("Initmap {:?}", initmap); + + let contractmap: HashMap<_, _> = contract_code + .into_iter() + .map(|it| (keccak_hash::keccak(&it), it)) + .collect(); + log::trace!("Contractmap {:?}", contractmap); + + let codemap: HashMap<_, _> = code_db + .clone() + .into_iter() + .map(|it| (keccak_hash::keccak(&it), it)) + .collect(); + log::trace!("Codemap {:?}", codemap); + + let mut res = codemap; + res.extend(contractmap); + res.extend(initmap); + res + }, block_metadata: b_meta.clone(), block_hashes: b_hashes.clone(), burn_addr, + jumpdest_table: { + // TODO See the issue Simulate to get jumpdests on a per-transaction basis #653. + // Note that this causes any batch containing just a single `None` to collapse + // into a `None`, which causing failover to simulating jumpdest analysis for the + // whole batch. There is an optimization opportunity here. + jumpdest_tables + .into_iter() + .collect::>>() + .map(|jdt| JumpDestTableWitness::merge(jdt.iter()).0) + }, }, ) .collect()) @@ -259,6 +327,8 @@ struct Batch { /// Empty for all but the final batch pub withdrawals: Vec<(Address, U256)>, + + pub jumpdest_tables: Vec>, } /// [`evm_arithmetization::generation::TrieInputs`], @@ -342,6 +412,8 @@ fn middle( )?; } + let mut jumpdest_tables = vec![]; + for txn in batch { let do_increment_txn_ix = txn.is_some(); let TxnInfo { @@ -351,6 +423,7 @@ fn middle( byte_code, new_receipt_trie_node_byte, gas_used: txn_gas_used, + jumpdest_table, }, } = txn.unwrap_or_default(); @@ -463,25 +536,7 @@ fn middle( state_trie.insert_by_address(addr, acct)?; state_mask.insert(TrieKey::from_address(addr)); } else { - // Simple state access - - fn is_precompile(addr: H160) -> bool { - let precompiled_addresses = if cfg!(feature = "eth_mainnet") { - address!("0000000000000000000000000000000000000001") - ..address!("000000000000000000000000000000000000000a") - } else { - // Remove KZG Peval for non-Eth mainnet networks - address!("0000000000000000000000000000000000000001") - ..address!("0000000000000000000000000000000000000009") - }; - - precompiled_addresses.contains(&addr.compat()) - || (cfg!(feature = "polygon_pos") - // Include P256Verify for Polygon PoS - && addr.compat() - == address!("0000000000000000000000000000000000000100")) - } - + // Simple state accessrify for Polygon PoS if receipt.status || !is_precompile(addr) { // TODO(0xaatif): https://github.com/0xPolygonZero/zk_evm/pull/613 // masking like this SHOULD be a space-saving optimization, @@ -496,6 +551,8 @@ fn middle( } } + jumpdest_tables.push(jumpdest_table); + if do_increment_txn_ix { txn_ix += 1; } @@ -548,6 +605,7 @@ fn middle( transactions_root: transaction_trie.root(), receipts_root: receipt_trie.root(), }, + jumpdest_tables, }); observer.collect_tries( @@ -784,7 +842,7 @@ impl Hash2Code { pub fn get(&mut self, hash: H256) -> anyhow::Result> { match self.inner.get(&hash) { Some(code) => Ok(code.clone()), - None => bail!("no code for hash {}", hash), + None => bail!("no code for hash {:#x}", hash), } } pub fn insert(&mut self, code: Vec) { diff --git a/trace_decoder/src/interface.rs b/trace_decoder/src/interface.rs index abe3b0af0..35248f9a3 100644 --- a/trace_decoder/src/interface.rs +++ b/trace_decoder/src/interface.rs @@ -5,6 +5,7 @@ use std::collections::{BTreeMap, BTreeSet, HashMap}; use ethereum_types::{Address, U256}; +use evm_arithmetization::jumpdest::JumpDestTableWitness; use evm_arithmetization::proof::{BlockHashes, BlockMetadata}; use evm_arithmetization::ConsolidatedHash; use keccak_hash::H256; @@ -111,6 +112,9 @@ pub struct TxnMeta { /// Gas used by this txn (Note: not cumulative gas used). pub gas_used: u64, + + /// JumpDest table + pub jumpdest_table: Option, } /// A "trace" specific to an account for a txn. diff --git a/trace_decoder/src/lib.rs b/trace_decoder/src/lib.rs index 049472c40..bf7ff71cc 100644 --- a/trace_decoder/src/lib.rs +++ b/trace_decoder/src/lib.rs @@ -66,6 +66,7 @@ mod typed_mpt; mod wire; pub use core::entrypoint; +pub use core::is_precompile; mod core; diff --git a/trace_decoder/src/typed_mpt.rs b/trace_decoder/src/typed_mpt.rs index 8baf3cf29..f9c1467ab 100644 --- a/trace_decoder/src/typed_mpt.rs +++ b/trace_decoder/src/typed_mpt.rs @@ -280,6 +280,7 @@ pub trait StateTrie { ) -> anyhow::Result>; fn insert_hash_by_key(&mut self, key: TrieKey, hash: H256) -> anyhow::Result<()>; fn get_by_address(&self, address: Address) -> Option; + #[allow(dead_code)] fn reporting_remove(&mut self, address: Address) -> anyhow::Result>; /// _Hash out_ parts of the trie that aren't in `txn_ixs`. fn mask(&mut self, address: impl IntoIterator) -> anyhow::Result<()>; diff --git a/zero/Cargo.toml b/zero/Cargo.toml index 5ccb57c96..34b23f2c6 100644 --- a/zero/Cargo.toml +++ b/zero/Cargo.toml @@ -12,6 +12,8 @@ categories.workspace = true __compat_primitive_types = { workspace = true } alloy = { workspace = true } alloy-compat = "0.1.0" +alloy-primitives = { workspace = true } +alloy-serde = { workspace = true } anyhow = { workspace = true } async-stream = { workspace = true } axum = { workspace = true } diff --git a/zero/src/bin/leader.rs b/zero/src/bin/leader.rs index 728f6bb0e..775439fde 100644 --- a/zero/src/bin/leader.rs +++ b/zero/src/bin/leader.rs @@ -72,6 +72,7 @@ async fn main() -> Result<()> { Command::Rpc { rpc_url, rpc_type, + jumpdest_src, block_interval, checkpoint_block_number, previous_proof, @@ -91,6 +92,7 @@ async fn main() -> Result<()> { backoff, max_retries, block_time, + jumpdest_src, }, block_interval, LeaderConfig { diff --git a/zero/src/bin/leader/cli.rs b/zero/src/bin/leader/cli.rs index 3569efc41..d89a349ea 100644 --- a/zero/src/bin/leader/cli.rs +++ b/zero/src/bin/leader/cli.rs @@ -4,7 +4,7 @@ use alloy::transports::http::reqwest::Url; use clap::{Parser, Subcommand, ValueHint}; use zero::prover::cli::CliProverConfig; use zero::prover_state::cli::CliProverStateConfig; -use zero::rpc::RpcType; +use zero::rpc::{JumpdestSrc, RpcType}; /// zero-bin leader config #[derive(Parser)] @@ -43,6 +43,9 @@ pub(crate) enum Command { // The node RPC type (jerigon / native). #[arg(long, short = 't', default_value = "jerigon")] rpc_type: RpcType, + /// The source of jumpdest tables. + #[arg(short = 'j', long, default_value_ifs = [("rpc_type", "jerigon", "prover-simulation"), ("rpc_type", "native", "client-fetched-structlogs")], required = false)] + jumpdest_src: JumpdestSrc, /// The block interval for which to generate a proof. #[arg(long, short = 'i')] block_interval: String, diff --git a/zero/src/bin/leader/client.rs b/zero/src/bin/leader/client.rs index 455cceb8b..bd01fcdcd 100644 --- a/zero/src/bin/leader/client.rs +++ b/zero/src/bin/leader/client.rs @@ -10,7 +10,7 @@ use zero::block_interval::{BlockInterval, BlockIntervalStream}; use zero::pre_checks::check_previous_proof_and_checkpoint; use zero::proof_types::GeneratedBlockProof; use zero::prover::{self, BlockProverInput, ProverConfig}; -use zero::rpc; +use zero::rpc::{self, JumpdestSrc}; use zero::rpc::{retry::build_http_retry_provider, RpcType}; #[derive(Debug)] @@ -20,6 +20,7 @@ pub struct RpcParams { pub backoff: u64, pub max_retries: u32, pub block_time: u64, + pub jumpdest_src: JumpdestSrc, } #[derive(Debug)] @@ -92,6 +93,7 @@ pub(crate) async fn client_main( block_id, leader_config.checkpoint_block_number, rpc_params.rpc_type, + rpc_params.jumpdest_src, ) .await?; block_tx diff --git a/zero/src/bin/rpc.rs b/zero/src/bin/rpc.rs index d49cdde5c..fe29247ee 100644 --- a/zero/src/bin/rpc.rs +++ b/zero/src/bin/rpc.rs @@ -16,6 +16,7 @@ use zero::block_interval::BlockIntervalStream; use zero::prover::BlockProverInput; use zero::provider::CachedProvider; use zero::rpc; +use zero::rpc::JumpdestSrc; use self::rpc::{retry::build_http_retry_provider, RpcType}; @@ -25,6 +26,7 @@ struct FetchParams { pub end_block: u64, pub checkpoint_block_number: Option, pub rpc_type: RpcType, + pub jumpdest_src: JumpdestSrc, } #[derive(Args, Clone, Debug)] @@ -35,6 +37,9 @@ struct RpcToolConfig { /// The RPC Tracer Type. #[arg(short = 't', long, default_value = "jerigon")] rpc_type: RpcType, + /// The source of jumpdest tables. + #[arg(short = 'j', long, default_value_ifs = [("rpc_type", "jerigon", "prover-simulation"), ("rpc_type", "native", "client-fetched-structlogs")], required = false)] + jumpdest_src: JumpdestSrc, /// Backoff in milliseconds for retry requests. #[arg(long, default_value_t = 0)] backoff: u64, @@ -102,6 +107,7 @@ where block_id, checkpoint_block_number, params.rpc_type, + params.jumpdest_src, ) .await?; @@ -130,6 +136,7 @@ impl Cli { end_block, checkpoint_block_number, rpc_type: self.config.rpc_type, + jumpdest_src: self.config.jumpdest_src, }; let block_prover_inputs = @@ -156,6 +163,7 @@ impl Cli { end_block: block_number, checkpoint_block_number: None, rpc_type: self.config.rpc_type, + jumpdest_src: self.config.jumpdest_src, }; let block_prover_inputs = @@ -208,6 +216,9 @@ async fn main() -> anyhow::Result<()> { tracing_subscriber::Registry::default() .with( tracing_subscriber::fmt::layer() + // With the default configuration trace information is written + // to stdout, but we already use stdout to write our payload (the witness). + .with_writer(std::io::stderr) .with_ansi(false) .compact() .with_filter(EnvFilter::from_default_env()), diff --git a/zero/src/rpc/jerigon.rs b/zero/src/rpc/jerigon.rs index df00bc605..73280fc99 100644 --- a/zero/src/rpc/jerigon.rs +++ b/zero/src/rpc/jerigon.rs @@ -1,13 +1,30 @@ -use alloy::{providers::Provider, rpc::types::eth::BlockId, transports::Transport}; +use core::iter::Iterator; +use std::collections::BTreeMap; +use std::ops::Deref as _; + +use __compat_primitive_types::H160; +use alloy::{ + providers::Provider, + rpc::types::{eth::BlockId, trace::geth::StructLog, Block, BlockTransactionsKind, Transaction}, + transports::Transport, +}; +use alloy_primitives::Address; use anyhow::Context as _; +use evm_arithmetization::{jumpdest::JumpDestTableWitness, CodeDb}; +use futures::stream::FuturesOrdered; +use futures::StreamExt as _; use serde::Deserialize; use serde_json::json; -use trace_decoder::{BlockTrace, BlockTraceTriePreImages, CombinedPreImages, TxnInfo}; +use trace_decoder::{BlockTrace, BlockTraceTriePreImages, CombinedPreImages, TxnInfo, TxnTrace}; +use tracing::info; -use super::fetch_other_block_data; +use super::{ + fetch_other_block_data, + jumpdest::{self, get_normalized_structlog}, + JumpdestSrc, +}; use crate::prover::BlockProverInput; use crate::provider::CachedProvider; - /// Transaction traces retrieved from Erigon zeroTracer. #[derive(Debug, Deserialize)] pub struct ZeroTxResult { @@ -20,6 +37,7 @@ pub async fn block_prover_input( cached_provider: std::sync::Arc>, target_block_id: BlockId, checkpoint_block_number: u64, + jumpdest_src: JumpdestSrc, ) -> anyhow::Result where ProviderT: Provider, @@ -33,16 +51,50 @@ where "debug_traceBlockByNumber".into(), (target_block_id, json!({"tracer": "zeroTracer"})), ) - .await?; + .await? + .into_iter() + .map(|ztr| ztr.result) + .collect::>(); // Grab block witness info (packed as combined trie pre-images) - let block_witness = cached_provider .get_provider() .await? .raw_request::<_, String>("eth_getWitness".into(), vec![target_block_id]) .await?; + let block = cached_provider + .get_block(target_block_id, BlockTransactionsKind::Full) + .await?; + + let jdts: Vec> = match jumpdest_src { + JumpdestSrc::ProverSimulation => vec![None; tx_results.len()], + JumpdestSrc::ClientFetchedStructlogs => { + process_transactions( + &block, + cached_provider.get_provider().await?.deref(), + tx_results.iter().map(|TxnInfo { traces, meta: _ }| traces), // &tx_traces, + ) + .await? + } + JumpdestSrc::ServerFetchedStructlogs => todo!("hybrid server bulk struct log retrieval/local jumpdest table generation not yet implemented"), + JumpdestSrc::Serverside => todo!(), + }; + + let mut code_db = CodeDb::default(); + // weave in the JDTs + let txn_info = tx_results + .into_iter() + .zip(jdts) + .map(|(mut tx_info, jdt)| { + tx_info.meta.jumpdest_table = jdt.map(|(j, c)| { + code_db.extend(c); + j + }); + tx_info + }) + .collect(); + let other_data = fetch_other_block_data(cached_provider, target_block_id, checkpoint_block_number).await?; @@ -53,9 +105,82 @@ where compact: hex::decode(block_witness.strip_prefix("0x").unwrap_or(&block_witness)) .context("invalid hex returned from call to eth_getWitness")?, }), - txn_info: tx_results.into_iter().map(|it| it.result).collect(), - code_db: Default::default(), + txn_info, + code_db, }, other_data, }) } + +/// Processes the transactions in the given block and updates the code db. +pub async fn process_transactions<'i, I, ProviderT, TransportT>( + block: &Block, + provider: &ProviderT, + tx_traces: I, +) -> anyhow::Result>> +where + ProviderT: Provider, + TransportT: Transport + Clone, + I: Iterator>, +{ + let futures = block + .transactions + .as_transactions() + .context("No transactions in block")? + .iter() + .zip(tx_traces) + .map(|(tx, tx_trace)| process_transaction(provider, tx, tx_trace)) + .collect::>(); + + futures + .collect::>() + .await + .into_iter() + .collect::, _>>() +} + +/// Processes the transaction with the given transaction hash and updates the +/// accounts state. +pub async fn process_transaction( + provider: &ProviderT, + tx: &Transaction, + tx_trace: &BTreeMap, +) -> anyhow::Result> +where + ProviderT: Provider, + TransportT: Transport + Clone, +{ + let tx_traces = tx_trace + .iter() + .map(|(h, t)| (Address::from(h.to_fixed_bytes()), t.clone())) + .collect(); + + let structlog_opt: Option> = get_normalized_structlog(provider, &tx.hash) + .await + .ok() + .flatten(); + + let jc: Option<(JumpDestTableWitness, CodeDb)> = structlog_opt.and_then(|struct_log| { + jumpdest::generate_jumpdest_table(tx, &struct_log, &tx_traces).map_or_else( + |error| { + info!( + "{:#?}: JumpDestTable generation failed with reason: {}", + tx.hash, error + ); + None + }, + |(jdt, code_db)| { + info!( + "{:#?}: JumpDestTable generation succeeded with result: {}", + tx.hash, jdt + ); + Some((jdt, code_db)) + }, + ) + }); + + // TODO + // let jumpdest_table = jc.map(|(j, c)| j); + + Ok(jc) +} diff --git a/zero/src/rpc/jumpdest.rs b/zero/src/rpc/jumpdest.rs new file mode 100644 index 000000000..3f5057b27 --- /dev/null +++ b/zero/src/rpc/jumpdest.rs @@ -0,0 +1,488 @@ +use core::default::Default; +use core::option::Option::None; +use core::str::FromStr as _; +use core::time::Duration; +use std::collections::BTreeMap; +use std::collections::HashMap; +use std::ops::Not as _; + +use __compat_primitive_types::H160; +use __compat_primitive_types::H256; +use alloy::primitives::Address; +use alloy::primitives::U160; +use alloy::providers::ext::DebugApi; +use alloy::providers::Provider; +use alloy::rpc::types::eth::Transaction; +use alloy::rpc::types::trace::geth::GethTrace; +use alloy::rpc::types::trace::geth::StructLog; +use alloy::rpc::types::trace::geth::{GethDebugTracingOptions, GethDefaultTracingOptions}; +use alloy::transports::RpcError; +use alloy::transports::Transport; +use alloy::transports::TransportErrorKind; +use alloy_primitives::B256; +use alloy_primitives::U256; +use anyhow::ensure; +use evm_arithmetization::jumpdest::JumpDestTableWitness; +use evm_arithmetization::CodeDb; +use keccak_hash::keccak; +use tokio::time::timeout; +use trace_decoder::is_precompile; +use trace_decoder::ContractCodeUsage; +use trace_decoder::TxnTrace; +use tracing::trace; + +/// The maximum time we are willing to wait for a structlog before failing over +/// to simulating the JumpDest analysis. +const TIMEOUT_LIMIT: Duration = Duration::from_secs(10 * 60); + +/// Structure of Etheruem memory +type Word = [u8; 32]; +const WORDSIZE: usize = std::mem::size_of::(); + +/// Pass `true` for the components needed. +fn structlog_tracing_options(stack: bool, memory: bool, storage: bool) -> GethDebugTracingOptions { + GethDebugTracingOptions { + config: GethDefaultTracingOptions { + disable_stack: Some(!stack), + // needed for CREATE2 + disable_memory: Some(!memory), + disable_storage: Some(!storage), + ..GethDefaultTracingOptions::default() + }, + tracer: None, + ..GethDebugTracingOptions::default() + } +} + +/// Get code hash from a read or write operation of contract code. +fn get_code_hash(usage: &ContractCodeUsage) -> H256 { + match usage { + ContractCodeUsage::Read(hash) => *hash, + ContractCodeUsage::Write(bytes) => keccak(bytes), + } +} + +/// Predicate that determines whether a `StructLog` that includes memory is +/// required. +fn trace_contains_create(structlog: &[StructLog]) -> bool { + structlog + .iter() + .any(|entry| entry.op == "CREATE" || entry.op == "CREATE2") +} + +/// Gets the lightest possible structlog for transcation `tx_hash`. +pub(crate) async fn get_normalized_structlog( + provider: &ProviderT, + tx_hash: &B256, +) -> Result>, RpcError> +where + ProviderT: Provider, + TransportT: Transport + Clone, +{ + let inner = async { + // Optimization: It may be a better default to pull the stack immediately. + let stackonly_structlog_trace = provider + .debug_trace_transaction(*tx_hash, structlog_tracing_options(true, false, false)) + .await?; + + let stackonly_structlog_opt: Option> = + trace2structlog(stackonly_structlog_trace).unwrap_or_default(); + + let need_memory = stackonly_structlog_opt + .as_deref() + .is_some_and(trace_contains_create); + trace!("Need structlog with memory: {need_memory}"); + + if need_memory.not() { + return Ok(stackonly_structlog_opt); + }; + + let memory_structlog_fut = provider.debug_trace_transaction( + *tx_hash, + structlog_tracing_options(true, need_memory, false), + ); + + let memory_structlog = trace2structlog(memory_structlog_fut.await?).unwrap_or_default(); + + Ok::>, RpcError>(memory_structlog) + }; + + match timeout(TIMEOUT_LIMIT, inner).await { + Err(ellapsed_error) => Err(RpcError::Transport(TransportErrorKind::Custom(Box::new( + ellapsed_error, + )))), + Ok(structlog_res) => Ok(structlog_res?), + } +} + +/// Generate at JUMPDEST table by simulating the call stack in EVM, +/// using a Geth structlog as input. +pub(crate) fn generate_jumpdest_table( + tx: &Transaction, + struct_log: &[StructLog], + tx_traces: &BTreeMap, +) -> anyhow::Result<(JumpDestTableWitness, CodeDb)> { + trace!("Generating JUMPDEST table for tx: {}", tx.hash); + + let mut jumpdest_table = JumpDestTableWitness::default(); + let mut code_db = CodeDb::default(); + + // This map does neither contain the `init` field of Contract Deployment + // transactions nor CREATE, CREATE2 payloads. + let callee_addr_to_code_hash: HashMap = tx_traces + .iter() + .filter_map(|(callee_addr, trace)| { + trace + .code_usage + .as_ref() + .map(|code| (*callee_addr, get_code_hash(code))) + }) + .collect(); + + // REVIEW: will be removed before merge + trace!( + "Transaction: {} is a {}.", + tx.hash, + if tx.to.is_some() { + "message call" + } else { + "contract creation" + } + ); + + let entrypoint_code_hash: H256 = match tx.to { + Some(to_address) if is_precompile(H160::from_str(&to_address.to_string())?) => { + return Ok((jumpdest_table, code_db)) + } + Some(to_address) if callee_addr_to_code_hash.contains_key(&to_address).not() => { + return Ok((jumpdest_table, code_db)) + } + Some(to_address) => callee_addr_to_code_hash[&to_address], + None => { + let init = &tx.input; + keccak(init) + } + }; + + // `None` encodes that previous `entry` was not a JUMP or JUMPI with true + // condition, `Some(jump_target)` encodes we came from a JUMP or JUMPI with + // true condition and target `jump_target`. + let mut prev_jump: Option = None; + + // The next available context. Starts at 1. Never decrements. + let mut next_ctx_available = 1; + // Immediately use context 1; + let mut call_stack = vec![(entrypoint_code_hash, next_ctx_available)]; + next_ctx_available += 1; + + for (step, entry) in struct_log.iter().enumerate() { + let op = entry.op.as_str(); + let curr_depth: usize = entry.depth.try_into().unwrap(); + + ensure!(curr_depth <= next_ctx_available, "Structlog is malformed."); + + while curr_depth < call_stack.len() { + call_stack.pop(); + } + + ensure!( + call_stack.is_empty().not(), + "Call stack was unexpectedly empty." + ); + let (code_hash, ctx) = call_stack.last().unwrap(); + + // REVIEW: will be removed before merge + trace!("TX: {:?}", tx.hash); + trace!("STEP: {:?}", step); + trace!("STEPS: {:?}", struct_log.len()); + trace!("OPCODE: {}", entry.op.as_str()); + trace!("CODE: {:?}", code_hash); + trace!("CTX: {:?}", ctx); + trace!("CURR_DEPTH: {:?}", curr_depth); + trace!("{:#?}\n", entry); + + match op { + "CALL" | "CALLCODE" | "DELEGATECALL" | "STATICCALL" => { + prev_jump = None; + ensure!(entry.stack.as_ref().is_some(), "No evm stack found."); + // We reverse the stack, so the order matches our assembly code. + let evm_stack: Vec<_> = entry.stack.as_ref().unwrap().iter().rev().collect(); + // These opcodes expect 6 or 7 operands on the stack, but for jumpdest-table + // generation we only use 2, and failures will be handled in + // next iteration by popping the stack accordingly. + let operands_used = 2; + + if evm_stack.len() < operands_used { + trace!( "Opcode {op} expected {operands_used} operands at the EVM stack, but only {} were found.", evm_stack.len()); + // Note for future debugging: There may exist edge cases, where the call + // context has been incremented before the call op fails. This should be + // accounted for before this and the following `continue`. The details are + // defined in `sys_calls.asm`. + continue; + } + // This is the same stack index (i.e. 2nd) for all four opcodes. See https://ethervm.io/#F1 + let [_gas, address, ..] = evm_stack[..] else { + unreachable!() + }; + + if *address > U256::from(U160::MAX) { + trace!( + "{op}: Callee address {} was larger than possible {}.", + *address, + U256::from(U160::MAX) + ); + // Se note above. + continue; + }; + let lower_20_bytes = U160::from(*address); + let callee_address = Address::from(lower_20_bytes); + + if callee_addr_to_code_hash.contains_key(&callee_address) { + let next_code_hash = callee_addr_to_code_hash[&callee_address]; + call_stack.push((next_code_hash, next_ctx_available)); + }; + + if is_precompile(H160::from_str(&callee_address.to_string())?) { + trace!("Called precompile at address {}.", &callee_address); + }; + + if callee_addr_to_code_hash.contains_key(&callee_address).not() + && is_precompile(H160::from_str(&callee_address.to_string())?).not() + { + // This case happens if calling an EOA. This is described + // under opcode `STOP`: https://www.evm.codes/#00?fork=cancun + trace!( + "Callee address {} has no associated `code_hash`.", + &callee_address + ); + } + next_ctx_available += 1; + } + "CREATE" | "CREATE2" => { + prev_jump = None; + ensure!(entry.stack.as_ref().is_some(), "No evm stack found."); + // We reverse the stack, so the order matches our assembly code. + let evm_stack: Vec<_> = entry.stack.as_ref().unwrap().iter().rev().collect(); + let operands_used = 3; + + if evm_stack.len() < operands_used { + trace!( "Opcode {op} expected {operands_used} operands at the EVM stack, but only {} were found.", evm_stack.len() ); + continue; + }; + + let [_value, offset, size, ..] = evm_stack[..] else { + unreachable!() + }; + if *offset > U256::from(usize::MAX) { + trace!( + "{op}: Offset {offset} was too large to fit in usize {}.", + usize::MAX + ); + continue; + }; + let offset: usize = offset.to(); + + if *size > U256::from(usize::MAX) { + trace!( + "{op}: Size {size} was too large to fit in usize {}.", + usize::MAX + ); + continue; + }; + let size: usize = size.to(); + + let memory_size = entry.memory.as_ref().unwrap().len() * WORDSIZE; + + if entry.memory.is_none() || offset + size > memory_size { + trace!("Insufficient memory available for {op}. Contract has size {size} and is supposed to be stored between offset {offset} and {}, but memory size is only {memory_size}.", offset+size); + continue; + } + let memory_raw: &[String] = entry.memory.as_ref().unwrap(); + let memory_parsed: Vec> = memory_raw + .iter() + .map(|mem_line| { + let mem_line_parsed = U256::from_str_radix(mem_line, 16)?; + Ok(mem_line_parsed.to_be_bytes()) + }) + .collect(); + let mem_res: anyhow::Result> = memory_parsed.into_iter().collect(); + if mem_res.is_err() { + trace!( + "{op}: Parsing memory failed with error: {}", + mem_res.unwrap_err() + ); + continue; + } + let memory: Vec = mem_res.unwrap().concat(); + + let init_code = &memory[offset..offset + size]; + code_db.insert(init_code.to_vec()); + let init_code_hash = keccak(init_code); + call_stack.push((init_code_hash, next_ctx_available)); + + next_ctx_available += 1; + } + "JUMP" => { + prev_jump = None; + ensure!(entry.stack.as_ref().is_some(), "No evm stack found."); + // We reverse the stack, so the order matches our assembly code. + let evm_stack: Vec<_> = entry.stack.as_ref().unwrap().iter().rev().collect(); + let operands = 1; + if evm_stack.len() < operands { + trace!( "Opcode {op} expected {operands} operands at the EVM stack, but only {} were found.", evm_stack.len() ); + continue; + } + let [jump_target, ..] = evm_stack[..] else { + unreachable!() + }; + + prev_jump = Some(*jump_target); + } + "JUMPI" => { + prev_jump = None; + ensure!(entry.stack.as_ref().is_some(), "No evm stack found."); + // We reverse the stack, so the order matches our assembly code. + let evm_stack: Vec<_> = entry.stack.as_ref().unwrap().iter().rev().collect(); + let operands = 2; + if evm_stack.len() < operands { + trace!( "Opcode {op} expected {operands} operands at the EVM stack, but only {} were found.", evm_stack.len()); + continue; + }; + + let [jump_target, condition, ..] = evm_stack[..] else { + unreachable!() + }; + let jump_condition = condition.is_zero().not(); + + if jump_condition { + prev_jump = Some(*jump_target) + } + } + "JUMPDEST" => { + let mut jumped_here = false; + + if let Some(jmp_target) = prev_jump { + jumped_here = jmp_target == U256::from(entry.pc); + } + prev_jump = None; + + if jumped_here.not() { + trace!( + "{op}: JUMPDESTs at offset {} was reached through fall-through.", + entry.pc + ); + continue; + } + + let jumpdest_offset = TryInto::::try_into(entry.pc); + if jumpdest_offset.is_err() { + trace!( + "{op}: Could not cast offset {} to usize {}.", + entry.pc, + usize::MAX + ); + continue; + } + assert!(jumpdest_offset.unwrap() < 24576); + jumpdest_table.insert(*code_hash, *ctx, jumpdest_offset.unwrap()); + } + "EXTCODECOPY" | "EXTCODESIZE" => { + prev_jump = None; + next_ctx_available += 1; + } + _ => { + prev_jump = None; + } + } + } + Ok((jumpdest_table, code_db)) +} + +fn trace2structlog(trace: GethTrace) -> Result>, serde_json::Error> { + match trace { + GethTrace::Default(it) => Ok(Some(it.struct_logs)), + GethTrace::JS(it) => Ok(Some(compat::deserialize(it)?.struct_logs)), + _ => Ok(None), + } +} +/// This module exists as a workaround for parsing `StructLog`. The `error` +/// field is a string in Geth and Alloy but an object in Erigon. A PR[^1] has +/// been merged to fix this upstream and should eventually render this +/// unnecessary. [^1]: `https://github.com/erigontech/erigon/pull/12089` +mod compat { + use std::{collections::BTreeMap, fmt, iter}; + + use alloy::rpc::types::trace::geth::{DefaultFrame, StructLog}; + use alloy_primitives::{Bytes, B256, U256}; + use serde::{de::SeqAccess, Deserialize, Deserializer}; + + pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result { + _DefaultFrame::deserialize(d) + } + + /// The `error` field is a `string` in `geth` etc. but an `object` in + /// `erigon`. + fn error<'de, D: Deserializer<'de>>(d: D) -> Result, D::Error> { + #[derive(Deserialize)] + #[serde(untagged)] + enum Error { + String(String), + #[allow(dead_code)] + Object(serde_json::Map), + } + Ok(match Error::deserialize(d)? { + Error::String(it) => Some(it), + Error::Object(_) => None, + }) + } + + #[derive(Deserialize)] + #[serde(remote = "DefaultFrame", rename_all = "camelCase")] + struct _DefaultFrame { + failed: bool, + gas: u64, + return_value: Bytes, + #[serde(deserialize_with = "vec_structlog")] + struct_logs: Vec, + } + + fn vec_structlog<'de, D: Deserializer<'de>>(d: D) -> Result, D::Error> { + struct Visitor; + impl<'de> serde::de::Visitor<'de> for Visitor { + type Value = Vec; + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("an array of `StructLog`") + } + fn visit_seq>(self, mut seq: A) -> Result { + #[derive(Deserialize)] + struct With(#[serde(with = "_StructLog")] StructLog); + let v = iter::from_fn(|| seq.next_element().transpose()) + .map(|it| it.map(|With(it)| it)) + .collect::>()?; + Ok(v) + } + } + + d.deserialize_seq(Visitor) + } + + #[derive(Deserialize)] + #[serde(remote = "StructLog", rename_all = "camelCase")] + struct _StructLog { + pc: u64, + op: String, + gas: u64, + gas_cost: u64, + depth: u64, + #[serde(deserialize_with = "error")] + error: Option, + stack: Option>, + return_data: Option, + memory: Option>, + #[serde(rename = "memSize")] + memory_size: Option, + storage: Option>, + #[serde(rename = "refund")] + refund_counter: Option, + } +} diff --git a/zero/src/rpc/mod.rs b/zero/src/rpc/mod.rs index 007a4fdb2..4f4908eb0 100644 --- a/zero/src/rpc/mod.rs +++ b/zero/src/rpc/mod.rs @@ -24,6 +24,7 @@ use tracing::warn; use crate::prover::BlockProverInput; pub mod jerigon; +pub mod jumpdest; pub mod native; pub mod retry; @@ -40,12 +41,22 @@ pub enum RpcType { Native, } +/// The Jumpdest source type. +#[derive(ValueEnum, Clone, Debug, Copy)] +pub enum JumpdestSrc { + ProverSimulation, + ClientFetchedStructlogs, + ServerFetchedStructlogs, // later + Serverside, // later +} + /// Obtain the prover input for one block pub async fn block_prover_input( cached_provider: Arc>, block_id: BlockId, checkpoint_block_number: u64, rpc_type: RpcType, + jumpdest_src: JumpdestSrc, ) -> Result where ProviderT: Provider, @@ -53,10 +64,22 @@ where { match rpc_type { RpcType::Jerigon => { - jerigon::block_prover_input(cached_provider, block_id, checkpoint_block_number).await + jerigon::block_prover_input( + cached_provider, + block_id, + checkpoint_block_number, + jumpdest_src, + ) + .await } RpcType::Native => { - native::block_prover_input(cached_provider, block_id, checkpoint_block_number).await + native::block_prover_input( + cached_provider, + block_id, + checkpoint_block_number, + jumpdest_src, + ) + .await } } } diff --git a/zero/src/rpc/native/mod.rs b/zero/src/rpc/native/mod.rs index 5b4ed5dd9..76dd302ae 100644 --- a/zero/src/rpc/native/mod.rs +++ b/zero/src/rpc/native/mod.rs @@ -1,4 +1,3 @@ -use std::collections::BTreeSet; use std::ops::Deref; use std::sync::Arc; @@ -16,20 +15,23 @@ use crate::provider::CachedProvider; mod state; mod txn; -type CodeDb = BTreeSet>; +pub use txn::{process_transaction, process_transactions}; + +use super::JumpdestSrc; /// Fetches the prover input for the given BlockId. pub async fn block_prover_input( provider: Arc>, block_number: BlockId, checkpoint_block_number: u64, + jumpdest_src: JumpdestSrc, ) -> anyhow::Result where ProviderT: Provider, TransportT: Transport + Clone, { let (block_trace, other_data) = try_join!( - process_block_trace(provider.clone(), block_number), + process_block_trace(provider.clone(), block_number, jumpdest_src), crate::rpc::fetch_other_block_data(provider.clone(), block_number, checkpoint_block_number) )?; @@ -40,9 +42,10 @@ where } /// Processes the block with the given block number and returns the block trace. -async fn process_block_trace( +pub(crate) async fn process_block_trace( cached_provider: Arc>, block_number: BlockId, + jumpdest_src: JumpdestSrc, ) -> anyhow::Result where ProviderT: Provider, @@ -52,8 +55,12 @@ where .get_block(block_number, BlockTransactionsKind::Full) .await?; - let (code_db, txn_info) = - txn::process_transactions(&block, cached_provider.get_provider().await?.deref()).await?; + let (code_db, txn_info) = txn::process_transactions( + &block, + cached_provider.get_provider().await?.deref(), + jumpdest_src, + ) + .await?; let trie_pre_images = state::process_state_witness(cached_provider, block, &txn_info).await?; Ok(BlockTrace { diff --git a/zero/src/rpc/native/txn.rs b/zero/src/rpc/native/txn.rs index 79de06d79..96d30f554 100644 --- a/zero/src/rpc/native/txn.rs +++ b/zero/src/rpc/native/txn.rs @@ -1,3 +1,4 @@ +use core::option::Option::None; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use __compat_primitive_types::{H256, U256}; @@ -9,27 +10,32 @@ use alloy::{ Provider, }, rpc::types::{ - eth::Transaction, - eth::{AccessList, Block}, + eth::{AccessList, Block, Transaction}, trace::geth::{ - AccountState, DiffMode, GethDebugBuiltInTracerType, GethTrace, PreStateConfig, - PreStateFrame, PreStateMode, + AccountState, DiffMode, GethDebugBuiltInTracerType, GethDebugTracerType, + GethDebugTracingOptions, GethTrace, PreStateConfig, PreStateFrame, PreStateMode, + StructLog, }, - trace::geth::{GethDebugTracerType, GethDebugTracingOptions}, }, transports::Transport, }; -use anyhow::Context as _; -use compat::Compat; +use anyhow::{Context as _, Ok}; +use evm_arithmetization::{jumpdest::JumpDestTableWitness, CodeDb}; use futures::stream::{FuturesOrdered, TryStreamExt}; use trace_decoder::{ContractCodeUsage, TxnInfo, TxnMeta, TxnTrace}; +use tracing::info; -use super::CodeDb; +use crate::rpc::Compat; +use crate::rpc::{ + jumpdest::{self, get_normalized_structlog}, + JumpdestSrc, +}; /// Processes the transactions in the given block and updates the code db. -pub(super) async fn process_transactions( +pub async fn process_transactions( block: &Block, provider: &ProviderT, + jumpdest_src: JumpdestSrc, ) -> anyhow::Result<(CodeDb, Vec)> where ProviderT: Provider, @@ -40,7 +46,7 @@ where .as_transactions() .context("No transactions in block")? .iter() - .map(|tx| process_transaction(provider, tx)) + .map(|tx| process_transaction(provider, tx, jumpdest_src)) .collect::>() .try_fold( (BTreeSet::new(), Vec::new()), @@ -55,37 +61,67 @@ where /// Processes the transaction with the given transaction hash and updates the /// accounts state. -async fn process_transaction( +pub async fn process_transaction( provider: &ProviderT, tx: &Transaction, + jumpdest_src: JumpdestSrc, ) -> anyhow::Result<(CodeDb, TxnInfo)> where ProviderT: Provider, TransportT: Transport + Clone, { - let (tx_receipt, pre_trace, diff_trace) = fetch_tx_data(provider, &tx.hash).await?; + let (tx_receipt, pre_trace, diff_trace, structlog_opt) = + fetch_tx_data(provider, &tx.hash, jumpdest_src).await?; let tx_status = tx_receipt.status(); let tx_receipt = tx_receipt.map_inner(rlp::map_receipt_envelope); let access_list = parse_access_list(tx.access_list.as_ref()); - let tx_meta = TxnMeta { - byte_code: ::TxEnvelope::try_from(tx.clone())?.encoded_2718(), - new_receipt_trie_node_byte: alloy::rlp::encode(tx_receipt.inner), - gas_used: tx_receipt.gas_used as u64, - }; - - let (code_db, mut tx_traces) = match (pre_trace, diff_trace) { + let (mut code_db, mut tx_traces) = match (pre_trace, diff_trace) { ( GethTrace::PreStateTracer(PreStateFrame::Default(read)), GethTrace::PreStateTracer(PreStateFrame::Diff(diff)), - ) => process_tx_traces(access_list, read, diff).await?, + ) => { + info!("{:?} {:?} {:?}", tx.hash, read, diff); + process_tx_traces(access_list, read, diff).await? + } _ => unreachable!(), }; // Handle case when transaction failed and a contract creation was reverted if !tx_status && tx_receipt.contract_address.is_some() { tx_traces.insert(tx_receipt.contract_address.unwrap(), TxnTrace::default()); - } + }; + + let jc: Option<(JumpDestTableWitness, CodeDb)> = structlog_opt.and_then(|struct_logs| { + jumpdest::generate_jumpdest_table(tx, &struct_logs, &tx_traces).map_or_else( + |error| { + info!( + "{:#?}: JumpDestTable generation failed with reason: {}", + tx.hash, error + ); + None + }, + |(jdt, code_db)| { + info!( + "{:#?}: JumpDestTable generation succeeded with result: {}", + tx.hash, jdt + ); + Some((jdt, code_db)) + }, + ) + }); + + let jumpdest_table = jc.map(|(j, c)| { + code_db.extend(c); + j + }); + + let tx_meta = TxnMeta { + byte_code: ::TxEnvelope::try_from(tx.clone())?.encoded_2718(), + new_receipt_trie_node_byte: alloy::rlp::encode(tx_receipt.inner), + gas_used: tx_receipt.gas_used as u64, + jumpdest_table, + }; Ok(( code_db, @@ -103,7 +139,13 @@ where async fn fetch_tx_data( provider: &ProviderT, tx_hash: &B256, -) -> anyhow::Result<(::ReceiptResponse, GethTrace, GethTrace), anyhow::Error> + jumpdest_src: JumpdestSrc, +) -> anyhow::Result<( + ::ReceiptResponse, + GethTrace, + GethTrace, + Option>, +)> where ProviderT: Provider, TransportT: Transport + Clone, @@ -112,13 +154,29 @@ where let pre_trace_fut = provider.debug_trace_transaction(*tx_hash, prestate_tracing_options(false)); let diff_trace_fut = provider.debug_trace_transaction(*tx_hash, prestate_tracing_options(true)); - let (tx_receipt, pre_trace, diff_trace) = - futures::try_join!(tx_receipt_fut, pre_trace_fut, diff_trace_fut,)?; + let (tx_receipt, pre_trace, diff_trace, structlog_trace) = match jumpdest_src { + JumpdestSrc::ClientFetchedStructlogs => { + let structlog_trace_fut = get_normalized_structlog(provider, tx_hash); + futures::try_join!( + tx_receipt_fut, + pre_trace_fut, + diff_trace_fut, + structlog_trace_fut, + )? + } + JumpdestSrc::ProverSimulation => { + let (tx_receipt, pre_trace, diff_trace) = + futures::try_join!(tx_receipt_fut, pre_trace_fut, diff_trace_fut,)?; + (tx_receipt, pre_trace, diff_trace, None) + } + _ => todo!(), + }; Ok(( tx_receipt.context("Transaction receipt not found.")?, pre_trace, diff_trace, + structlog_trace, )) }