From 96e0acdd2981353a14a131545e00183a558c12bf Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Mon, 30 Sep 2024 14:44:23 +0100 Subject: [PATCH 1/6] Removing asm path. --- Cargo.toml | 6 +- asm-utils/src/ast.rs | 220 ----------- asm-utils/src/data_parser.rs | 277 ------------- asm-utils/src/data_storage.rs | 157 -------- asm-utils/src/lib.rs | 20 - asm-utils/src/parser.rs | 17 - asm-utils/src/reachability.rs | 305 --------------- asm-utils/src/utils.rs | 66 ---- cli-rs/src/main.rs | 107 +---- {asm-utils => isa-utils}/Cargo.toml | 10 +- isa-utils/src/lib.rs | 21 + pipeline/Cargo.toml | 1 - riscv/Cargo.toml | 2 +- riscv/benches/executor_benchmark.rs | 19 +- riscv/src/asm/disambiguator.rs | 155 -------- riscv/src/asm/mod.rs | 581 ---------------------------- riscv/src/asm/parser.rs | 35 -- riscv/src/asm/riscv_asm.lalrpop | 200 ---------- riscv/src/code_gen.rs | 22 +- riscv/src/elf/mod.rs | 2 +- riscv/src/lib.rs | 290 ++++---------- riscv/tests/instructions.rs | 22 +- riscv/tests/riscv.rs | 105 ++--- 23 files changed, 158 insertions(+), 2482 deletions(-) delete mode 100644 asm-utils/src/ast.rs delete mode 100644 asm-utils/src/data_parser.rs delete mode 100644 asm-utils/src/data_storage.rs delete mode 100644 asm-utils/src/lib.rs delete mode 100644 asm-utils/src/parser.rs delete mode 100644 asm-utils/src/reachability.rs delete mode 100644 asm-utils/src/utils.rs rename {asm-utils => isa-utils}/Cargo.toml (50%) create mode 100644 isa-utils/src/lib.rs delete mode 100644 riscv/src/asm/disambiguator.rs delete mode 100644 riscv/src/asm/mod.rs delete mode 100644 riscv/src/asm/parser.rs delete mode 100644 riscv/src/asm/riscv_asm.lalrpop diff --git a/Cargo.toml b/Cargo.toml index d76db81f3e..75bac00111 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ members = [ "ast", "analysis", "linker", - "asm-utils", + "isa-utils", "airgen", "riscv-executor", "riscv-syscalls", @@ -44,7 +44,7 @@ powdr = { path = "./powdr", version = "0.1.0-alpha.2" } powdr-airgen = { path = "./airgen", version = "0.1.0-alpha.2" } powdr-ast = { path = "./ast", version = "0.1.0-alpha.2" } powdr-asm-to-pil = { path = "./asm-to-pil", version = "0.1.0-alpha.2" } -powdr-asm-utils = { path = "./asm-utils", version = "0.1.0-alpha.2" } +powdr-isa-utils = { path = "./isa-utils", version = "0.1.0-alpha.2" } powdr-analysis = { path = "./analysis", version = "0.1.0-alpha.2" } powdr-backend = { path = "./backend", version = "0.1.0-alpha.2" } powdr-executor = { path = "./executor", version = "0.1.0-alpha.2" } @@ -74,4 +74,4 @@ codegen-units = 256 [profile.release-with-debug] inherits = "release" -debug = true \ No newline at end of file +debug = true diff --git a/asm-utils/src/ast.rs b/asm-utils/src/ast.rs deleted file mode 100644 index 57c0ed2813..0000000000 --- a/asm-utils/src/ast.rs +++ /dev/null @@ -1,220 +0,0 @@ -//! Common AST for the frontend architecture inputs. - -use std::fmt::{self, Debug, Display}; - -use itertools::Itertools; - -#[derive(Clone, Debug)] -pub enum Statement { - Label(String), - Directive(String, Vec>), - Instruction(String, Vec>), -} - -#[derive(Clone, Debug)] -pub enum Argument { - Register(R), - RegOffset(Option>, R), - StringLiteral(Vec), - Expression(Expression), -} - -impl Argument { - pub fn post_visit_expressions_mut(&mut self, f: &mut impl FnMut(&mut Expression)) { - match self { - Argument::Register(_) | Argument::StringLiteral(_) | Argument::RegOffset(None, _) => (), - Argument::RegOffset(Some(expr), _) | Argument::Expression(expr) => { - expr.post_visit_mut(f); - } - } - } - - pub fn post_visit_expressions<'a>(&'a self, f: &mut impl FnMut(&'a Expression)) { - match self { - Argument::Register(_) | Argument::StringLiteral(_) | Argument::RegOffset(None, _) => (), - Argument::RegOffset(Some(expr), _) | Argument::Expression(expr) => { - expr.post_visit(f); - } - } - } -} - -pub trait Register: Display + Debug {} - -#[derive(Clone, Copy, Debug)] -pub enum UnaryOpKind { - Negation, - BitwiseNot, -} - -impl Display for UnaryOpKind { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - UnaryOpKind::Negation => write!(f, "-"), - UnaryOpKind::BitwiseNot => write!(f, "~"), - } - } -} - -#[derive(Clone, Copy, Debug)] -pub enum BinaryOpKind { - Or, - Xor, - And, - LeftShift, - RightShift, - Add, - Sub, - Mul, - Div, - Mod, -} - -pub trait FunctionOpKind: Display + Debug {} - -#[derive(Clone, Debug)] -pub enum Expression { - Number(i64), - Symbol(String), - UnaryOp(UnaryOpKind, Box>), - BinaryOp(BinaryOpKind, Box<[Expression; 2]>), - FunctionOp(F, Box>), -} - -impl Expression { - fn post_visit<'a>(&'a self, f: &mut impl FnMut(&'a Expression)) { - match self { - Expression::Number(_) => {} - Expression::Symbol(_) => {} - Expression::UnaryOp(_, subexpr) => { - Self::post_visit(subexpr, f); - } - Expression::BinaryOp(_, subexprs) => { - subexprs.iter().for_each(|subexpr| { - Self::post_visit(subexpr, f); - }); - } - Expression::FunctionOp(_, subexpr) => { - Self::post_visit(subexpr, f); - } - } - f(self); - } - - fn post_visit_mut(&mut self, f: &mut impl FnMut(&mut Expression)) { - match self { - Expression::Number(_) => {} - Expression::Symbol(_) => {} - Expression::UnaryOp(_, subexpr) => { - Self::post_visit_mut(subexpr, f); - } - Expression::BinaryOp(_, subexprs) => { - subexprs.iter_mut().for_each(|subexpr| { - Self::post_visit_mut(subexpr, f); - }); - } - Expression::FunctionOp(_, subexpr) => { - Self::post_visit_mut(subexpr, f); - } - } - f(self); - } -} - -pub fn new_unary_op(op: UnaryOpKind, v: Expression) -> Expression { - Expression::UnaryOp(op, Box::new(v)) -} - -pub fn new_binary_op( - op: BinaryOpKind, - l: Expression, - r: Expression, -) -> Expression { - Expression::BinaryOp(op, Box::new([l, r])) -} - -pub fn new_function_op(op: F, v: Expression) -> Expression { - Expression::FunctionOp(op, Box::new(v)) -} - -impl Display for Statement { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Statement::Label(l) => writeln!(f, "{l}:"), - Statement::Directive(d, args) => writeln!(f, " {d} {}", args.iter().format(", ")), - Statement::Instruction(i, args) => writeln!(f, " {i} {}", args.iter().format(", ")), - } - } -} - -impl Display for Argument { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Argument::Register(r) => write!(f, "{r}"), - Argument::RegOffset(None, reg) => write!(f, "({reg})"), - Argument::RegOffset(Some(off), reg) => write!(f, "{off}({reg})"), - Argument::StringLiteral(lit) => write!(f, "\"{}\"", String::from_utf8_lossy(lit)), - Argument::Expression(expr) => write!(f, "{expr}"), - } - } -} - -impl Display for Expression { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Expression::Number(n) => write!(f, "{n}"), - Expression::Symbol(sym) => write!(f, "{sym}"), - Expression::UnaryOp(kind, expr) => write!(f, "({kind}{expr})"), - Expression::BinaryOp(op, args) => { - let symbol = match op { - BinaryOpKind::Or => "|", - BinaryOpKind::Xor => "^", - BinaryOpKind::And => "&", - BinaryOpKind::LeftShift => "<<", - BinaryOpKind::RightShift => ">>", - BinaryOpKind::Add => "+", - BinaryOpKind::Sub => "-", - BinaryOpKind::Mul => "*", - BinaryOpKind::Div => "/", - BinaryOpKind::Mod => "%", - }; - write!(f, "({} {symbol} {})", args[0], args[1]) - } - Expression::FunctionOp(kind, expr) => write!(f, "{kind}({expr})"), - } - } -} - -/// Parse an escaped string - used in the grammar. -pub fn unescape_string(s: &str) -> Vec { - assert!(s.len() >= 2); - assert!(s.starts_with('"') && s.ends_with('"')); - let mut chars = s[1..s.len() - 1].chars(); - let mut result = vec![]; - while let Some(c) = chars.next() { - result.push(if c == '\\' { - let next = chars.next().unwrap(); - if next.is_ascii_digit() { - // octal number. - let n = next as u8 - b'0'; - let nn = chars.next().unwrap() as u8 - b'0'; - let nnn = chars.next().unwrap() as u8 - b'0'; - nnn + nn * 8 + n * 64 - } else if next == 'x' { - todo!("Parse hex digit"); - } else { - (match next { - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - 'b' => 8 as char, - 'f' => 12 as char, - other => other, - }) as u8 - } - } else { - c as u8 - }) - } - result -} diff --git a/asm-utils/src/data_parser.rs b/asm-utils/src/data_parser.rs deleted file mode 100644 index 03bd6ebf04..0000000000 --- a/asm-utils/src/data_parser.rs +++ /dev/null @@ -1,277 +0,0 @@ -use std::collections::BTreeMap; - -use crate::{ - ast::{Argument, BinaryOpKind, Expression, FunctionOpKind, Register, Statement}, - utils::{alignment_size, split_at_first}, -}; - -#[derive(Debug)] -pub enum DataValue { - Direct(Vec), - Zero(usize), - // alignment size and the byte value used as padding - Alignment(usize, u8), - Reference(String), - // This is needed for .word directives such as - // .word .Lfunc_begin0-.Lfunc_begin0 - Offset(String, String), -} - -impl DataValue { - /// Returns the size of the value in bytes. - /// - /// The address is necessary because the size of the alignment padding - /// depends on what address it is defined on. - pub fn size(&self, from_addr: usize) -> usize { - match self { - DataValue::Direct(data) => data.len(), - DataValue::Zero(length) => *length, - DataValue::Alignment(bytes, _) => alignment_size(from_addr, *bytes), - DataValue::Reference(_) => 4, - DataValue::Offset(..) => 4, - } - } -} - -#[derive(Default)] -struct DataSections { - /// This is a vector of sections, where each section is a vector of (maybe - /// named) labels, which in turn contains a sequence of data values. - /// - /// I weighted against making this and a potential `struct Section` part of - /// the public API because the users would need to know and access all the - /// internals anyway, so it wouldn't be abstracting away any complexity. - sections: Vec, Vec)>>, -} - -impl DataSections { - fn new() -> Self { - Default::default() - } - - fn current_entry(&mut self) -> &mut Vec { - let last_section = self.sections.last_mut().unwrap(); - if last_section.is_empty() { - last_section.push((None, Vec::new())) - } - &mut last_section.last_mut().unwrap().1 - } - - fn append_label_to_curr_section(&mut self, label: &str) { - let last_section = self.sections.last_mut().unwrap(); - last_section.push((Some(label.to_owned()), Vec::new())); - } - - fn append_section(&mut self) { - self.sections.push(Vec::new()) - } - - fn add_empty_section(&mut self, label: String) { - self.sections.push(vec![(Some(label), Vec::new())]); - - // If there are other sections, the previous one is the active one, so we swap. - let len = self.sections.len(); - if len > 1 { - self.sections.swap(len - 1, len - 2); - } - } -} - -pub struct DataObjects { - pub sections: Vec, Vec)>>, - pub adhoc_symbols: BTreeMap, -} - -/// Extract all data objects from the list of statements. -/// Returns the named data objects themselves and a vector of the names -/// in the order in which they occur in the statements. -pub fn extract_data_objects( - statements: &[Statement], -) -> DataObjects { - let mut adhoc_symbols = BTreeMap::new(); - let mut data = DataSections::new(); - - let mut is_in_data_section = false; - - for s in statements { - match s { - Statement::Label(l) => { - if is_in_data_section { - data.append_label_to_curr_section(l); - } - } - Statement::Directive(dir, args) => match (dir.as_str(), &args[..]) { - (".text", args) => { - assert!(args.is_empty()); - is_in_data_section = false; - } - (".data", args) => { - assert!(args.is_empty()); - is_in_data_section = true; - data.append_section(); - } - (".section", args) => { - is_in_data_section = is_data_section(&args[0]); - if is_in_data_section { - data.append_section(); - } - } - ( - ".zero" | ".ascii" | ".asciz" | ".dword" | ".word" | ".half" | ".hword" - | ".short" | ".byte", - args, - ) => { - if is_in_data_section { - data.current_entry() - .extend(extract_data_value(dir.as_str(), args)); - } else { - // This is most likely debug data. - } - } - (".balign", [Argument::Expression(Expression::Number(byte_size))]) => { - if is_in_data_section { - data.current_entry() - .push(DataValue::Alignment(*byte_size as usize, 0)); - } - } - ( - ".balign", - [Argument::Expression(Expression::Number(byte_size)), Argument::Expression(Expression::Number(pad_value))], - ) => { - if is_in_data_section { - data.current_entry() - .push(DataValue::Alignment(*byte_size as usize, *pad_value as u8)); - } - } - (".p2align", [Argument::Expression(Expression::Number(pow_of_2))]) => { - if is_in_data_section { - data.current_entry() - .push(DataValue::Alignment((1 << pow_of_2) as usize, 0)); - } - } - ( - ".p2align", - [Argument::Expression(Expression::Number(pow_of_2)), Argument::Expression(Expression::Number(pad_value))], - ) => { - if is_in_data_section { - data.current_entry().push(DataValue::Alignment( - (1 << pow_of_2) as usize, - *pad_value as u8, - )); - } - } - ( - ".set", - [Argument::Expression(Expression::Symbol(label)), Argument::Expression(Expression::Number(value))], - ) => { - // This is a directive that sets a symbol to a value. We - // create a phantom empty data section so reachability is - // happy, but we also save it so we can replace the symbol - // with the value when needed. - data.add_empty_section(label.clone()); - adhoc_symbols.insert(label.clone(), *value as u32); - } - - (n @ ".balign" | n @ ".p2align", arg) => { - // TODO: implement last optional argument of .balign and .p2align - unimplemented!("{n} {arg:?}"); - } - _ => {} - }, - _ => {} - } - } - DataObjects { - sections: data.sections, - adhoc_symbols, - } -} - -fn is_data_section(arg: &Argument) -> bool { - let full_name = match arg { - Argument::StringLiteral(name) => name.as_slice(), - Argument::Expression(Expression::Symbol(name)) => name.as_bytes(), - _ => return false, - }; - - // split out the part before the initial '.' - let name = split_at_first(full_name, &b'.').1.unwrap(); - - // isolate name until next '.' - let name = split_at_first(name, &b'.').0; - - matches!( - name, - b"sbss" | b"tbss" | b"bss" | b"sdata" | b"tdata" | b"rodata" | b"data" | b"data1" - ) -} - -fn extract_data_value( - directive: &str, - arguments: &[Argument], -) -> Vec { - match (directive, arguments) { - (".zero", [Argument::Expression(Expression::Number(n))]) => { - vec![DataValue::Zero(*n as usize)] - } - ( - ".zero", - [Argument::Expression(Expression::Number(n)), Argument::Expression(Expression::Number(value))], - ) => { - assert!(0 <= *value && *value <= 0xff); - vec![DataValue::Direct(vec![*value as u8; *n as usize])] - } - (".ascii", [Argument::StringLiteral(data)]) => { - vec![DataValue::Direct(data.clone())] - } - (".asciz", [Argument::StringLiteral(data)]) => { - let mut data = data.clone(); - data.push(0); - vec![DataValue::Direct(data)] - } - (".dword" | ".half" | ".hword" | ".short" | ".byte", data) => { - let len = match directive { - ".dword" => 8, - ".byte" => 1, - _ => 2, - }; - - let mut bytes = Vec::with_capacity(data.len() * len); - for arg in data { - let Argument::Expression(Expression::Number(n)) = arg else { - panic!("only literals are supported for .{directive}"); - }; - for byte in 0..len { - bytes.push((n >> (byte * 8) & 0xff) as u8); - } - } - - vec![DataValue::Direct(bytes)] - } - (".word", data) => data - .iter() - .map(|x| match x { - Argument::Expression(Expression::Number(n)) => { - let n = *n as u32; - DataValue::Direct(vec![ - (n & 0xff) as u8, - (n >> 8 & 0xff) as u8, - (n >> 16 & 0xff) as u8, - (n >> 24 & 0xff) as u8, - ]) - } - Argument::Expression(Expression::Symbol(sym)) => DataValue::Reference(sym.clone()), - Argument::Expression(Expression::BinaryOp(BinaryOpKind::Sub, args)) => { - match args.as_slice() { - [Expression::Symbol(a), Expression::Symbol(b)] => { - DataValue::Offset(a.to_string(), b.to_string()) - } - _ => panic!("Invalid .word directive"), - } - } - _ => panic!("Invalid .word directive"), - }) - .collect::>(), - _ => panic!(), - } -} diff --git a/asm-utils/src/data_storage.rs b/asm-utils/src/data_storage.rs deleted file mode 100644 index 4563a08c96..0000000000 --- a/asm-utils/src/data_storage.rs +++ /dev/null @@ -1,157 +0,0 @@ -//! Utilities for generating initialization code that stores data objects in memory. - -use std::collections::BTreeMap; - -use crate::{ - data_parser::DataValue, - utils::{alignment_size, next_aligned}, -}; - -/// A single 32-bit data value. -pub enum SingleDataValue { - /// A literal value. - Value(u32), - /// The value of a pointer to a text label. Since there is no 1-to-1 - /// correspondence between RISC-V and Powdr ASM instructions, this is - /// passed unresolved to the code generator. - LabelReference(String), - /// Currently not supported. - Offset(String, String), -} - -struct WordWriter<'a> { - data_writer: &'a mut dyn FnMut(Option, u32, SingleDataValue), - partial: u32, - current_pos: u32, - latest_label: Option, -} - -impl<'a> WordWriter<'a> { - fn new( - starting_pos: u32, - data_writer: &'a mut dyn FnMut(Option, u32, SingleDataValue), - ) -> Self { - // sanitary alignment to 8 bytes - let current_pos = next_aligned(starting_pos as usize, 8) as u32; - Self { - partial: 0, - current_pos, - data_writer, - latest_label: None, - } - } - - fn current_position(&self) -> u32 { - self.current_pos - } - - fn set_label(&mut self, label: String) { - self.latest_label = Some(label) - } - - fn advance(&mut self, bytes: u32) { - let next_pos = self.current_pos + bytes; - - // if changed words, flush - let curr_word = self.current_pos & (!0b11); - if (next_pos & (!0b11) != curr_word) && (self.partial != 0) { - (*self.data_writer)( - std::mem::take(&mut self.latest_label), - curr_word, - SingleDataValue::Value(self.partial), - ); - self.partial = 0; - } - self.current_pos = next_pos; - } - - fn align(&mut self, alignment: u32, pad_value: u8) { - let padding_size = alignment_size(self.current_pos as usize, alignment as usize); - if padding_size != 0 { - if pad_value == 0 { - self.advance(padding_size as u32); - } else { - self.write_bytes(std::iter::repeat(pad_value).take(padding_size)); - } - } - } - - fn write_bytes>(&mut self, bytes: I) { - for b in bytes { - self.partial |= (b as u32) << (8 * (self.current_pos % 4)); - self.advance(1); - } - } - - fn write_label_reference(&mut self, label: String) { - assert_eq!( - self.current_pos % 4, - 0, - "reference to code labels in misaligned data section is not supported" - ); - - (*self.data_writer)( - std::mem::take(&mut self.latest_label), - self.current_pos, - SingleDataValue::LabelReference(label), - ); - - assert_eq!(self.partial, 0); - self.current_pos += 4; - } - - fn finish(mut self) { - // ensure the latest partial word is written - self.advance(4); - } -} - -pub fn store_data_objects( - sections: Vec, Vec)>>, - memory_start: u32, - code_gen: &mut dyn FnMut(Option, u32, SingleDataValue), - positions: &mut BTreeMap, -) { - let mut writer = WordWriter::new(memory_start, code_gen); - - let mut current_pos = writer.current_position(); - for (name, data) in sections.iter().flatten() { - if let Some(name) = name { - positions.insert(name.clone(), current_pos); - } - for d in data.iter() { - current_pos += d.size(current_pos as usize) as u32; - } - } - - for (name, data) in sections.into_iter().flatten() { - if let Some(name) = name { - writer.set_label(name); - } - for item in data { - match item { - DataValue::Zero(length) => { - // We can assume memory to be zero-initialized, so we - // just have to advance. - writer.advance(length as u32); - } - DataValue::Direct(bytes) => { - writer.write_bytes(bytes.iter().copied()); - } - DataValue::Reference(sym) => { - if let Some(p) = positions.get(&sym) { - writer.write_bytes(p.to_le_bytes().iter().copied()); - } else { - // code reference - writer.write_label_reference(sym); - } - } - DataValue::Alignment(bytes, pad_value) => { - writer.align(bytes as u32, pad_value); - } - DataValue::Offset(_l, _r) => unimplemented!(), - } - } - } - writer.finish(); -} diff --git a/asm-utils/src/lib.rs b/asm-utils/src/lib.rs deleted file mode 100644 index 3685c7d43b..0000000000 --- a/asm-utils/src/lib.rs +++ /dev/null @@ -1,20 +0,0 @@ -//! Common crate for generalized assembly handling. - -#![deny(clippy::print_stdout)] - -use ast::{Argument, FunctionOpKind, Register}; - -pub mod ast; -pub mod data_parser; -pub mod data_storage; -pub mod parser; -pub mod reachability; -pub mod utils; - -pub trait Architecture { - fn instruction_ends_control_flow(instr: &str) -> bool; - fn get_references<'a, R: Register, F: FunctionOpKind>( - instr: &str, - args: &'a [Argument], - ) -> Vec<&'a str>; -} diff --git a/asm-utils/src/parser.rs b/asm-utils/src/parser.rs deleted file mode 100644 index 3f0f29dcda..0000000000 --- a/asm-utils/src/parser.rs +++ /dev/null @@ -1,17 +0,0 @@ -use crate::ast::{FunctionOpKind, Register, Statement}; - -pub fn parse_asm>( - parser: P, - input: &str, -) -> Vec> { - input - .split('\n') - .map(|l| l.trim()) - .filter(|l| !l.is_empty()) - .flat_map(|line| parser.parse(line).unwrap()) - .collect() -} - -pub trait Parser { - fn parse(&self, input: &str) -> Result>, String>; -} diff --git a/asm-utils/src/reachability.rs b/asm-utils/src/reachability.rs deleted file mode 100644 index d331728922..0000000000 --- a/asm-utils/src/reachability.rs +++ /dev/null @@ -1,305 +0,0 @@ -use std::collections::{BTreeMap, BTreeSet, HashSet}; - -use itertools::Itertools; - -use crate::data_parser::DataValue; -use crate::Architecture; - -use crate::ast::{Argument, Expression, FunctionOpKind, Register, Statement}; - -/// Processes the statements and removes all statements and objects that are -/// not reachable from the label `label`. -/// Keeps the order of the statements. -pub fn filter_reachable_from<'a, R: Register, F: FunctionOpKind, A: Architecture>( - label: &str, - statements: &mut Vec>, - data_sections: &'a mut Vec, Vec)>>, -) -> HashSet<&'a str> { - let replacements = extract_replacements(statements); - let replacement_refs = replacements - .iter() - .map(|(k, v)| (k.as_str(), v.as_str())) - .collect(); - let (referenced_labels, referenced_data_sections) = - find_reachable_labels::(label, statements, data_sections, &replacement_refs); - - { - let mut iter_idx = 0usize; - data_sections.retain(|_| { - let must_retain = referenced_data_sections.contains(&iter_idx); - iter_idx += 1; - must_retain - }); - } - - let mut remaining_data_labels = HashSet::new(); - for (name, value) in data_sections.iter_mut().flatten() { - if let Some(label) = name { - remaining_data_labels.insert(label.as_str()); - } - - apply_replacement_to_object(value, &replacement_refs) - } - - let mut active = false; - *statements = std::mem::take(statements) - .into_iter() - .filter_map(|mut s| { - let include = if active { - if ends_control_flow::(&s) { - active = false; - } - true - } else { - if let Statement::Label(l) = &s { - active = referenced_labels.contains(l) - && !remaining_data_labels.contains(l.as_str()); - } - active - }; - - if include { - apply_replacement_to_instruction(&mut s, &replacement_refs); - Some(s) - } else { - None - } - }) - .collect(); - - remaining_data_labels -} - -#[allow(clippy::print_stderr)] -pub fn find_reachable_labels<'a, R: Register, F: FunctionOpKind, A: Architecture>( - label: &'a str, - statements: &'a [Statement], - data_sections: &'a [Vec<(Option, Vec)>], - replacements: &BTreeMap<&str, &'a str>, -) -> (HashSet, HashSet) { - // Maps each data label to the section they belong to - let all_data_labels: BTreeMap<&str, usize> = data_sections - .iter() - .enumerate() - .flat_map(|(section_idx, entries)| { - entries - .iter() - .filter_map(move |(name, _)| name.as_ref().map(|name| (name.as_str(), section_idx))) - }) - .collect(); - - let label_offsets = extract_label_offsets(statements); - let mut queued_labels = BTreeSet::from([label]); - let mut processed_labels = HashSet::new(); - let mut reached_data_sections = HashSet::new(); - while let Some(l) = queued_labels.pop_first() { - let l = *replacements.get(l).unwrap_or(&l); - if !processed_labels.insert(l.to_owned()) { - continue; - } - - let new_references = if let Some(section_idx) = all_data_labels.get(l) { - reached_data_sections.insert(*section_idx); - let section = &data_sections[*section_idx]; - section - .iter() - .flat_map(|(_, values)| values.iter()) - .filter_map(|v| { - if let DataValue::Reference(sym) = v { - Some(sym.as_str()) - } else { - None - } - }) - .collect() - } else if let Some(offset) = label_offsets.get(l) { - let (referenced_labels_in_block, seen_labels_in_block) = - basic_block_references_starting_from::(&statements[*offset..]); - processed_labels.extend(seen_labels_in_block.into_iter().map(|s| s.to_string())); - referenced_labels_in_block - } else { - eprintln!( - "The assembly code references an external routine / label that is not available:" - ); - eprintln!("{l}"); - panic!(); - }; - for referenced in new_references { - if !processed_labels.contains(referenced) { - queued_labels.insert(referenced); - } - } - } - - (processed_labels, reached_data_sections) -} - -fn extract_replacements( - statements: &[Statement], -) -> BTreeMap { - let mut replacements = statements - .iter() - .filter_map(|s| match s { - Statement::Directive(dir, args) if dir.as_str() == ".set" => { - match &args[..] { - [Argument::Expression(Expression::Symbol(from)), Argument::Expression(Expression::Symbol(to))] => - { - Some((from.to_string(), to.to_string())) - }, - [Argument::Expression(Expression::Symbol(_)), Argument::Expression(Expression::Number(_))] => { - // Not a replacement, but not an error either, so ignore. - None - } - _ =>{ - panic!(); - } - } - }, - _ => None, - }) - .fold(BTreeMap::new(), |mut acc, (from, to)| { - if acc.insert(from.to_string(), to).is_some() { - panic!("Duplicate .set directive: {from}") - } - acc - }); - - // Replacements might have multiple indirections. Resolve to the last - // indirection name: - let keys = replacements.keys().cloned().collect::>(); - for mut curr in keys { - let mut seen = BTreeSet::new(); - while let Some(to) = replacements.get(&curr) { - if !seen.insert(curr) { - panic!( - "Cycle detected among .set directives involving:\n {}", - seen.into_iter().format("\n ") - ) - } - curr = to.to_string(); - } - - for key in seen { - replacements.insert(key, curr.to_string()); - } - } - - replacements -} - -pub fn extract_label_offsets( - statements: &[Statement], -) -> BTreeMap<&str, usize> { - statements - .iter() - .enumerate() - .filter_map(|(i, s)| match s { - Statement::Label(l) => Some((l.as_str(), i)), - Statement::Directive(_, _) | Statement::Instruction(_, _) => None, - }) - .fold(BTreeMap::new(), |mut acc, (n, i)| { - if acc.insert(n, i).is_some() { - panic!("Duplicate label: {n}") - } - acc - }) -} - -pub fn references_in_statement( - statement: &Statement, -) -> BTreeSet<&str> { - let mut ret = BTreeSet::new(); - match statement { - Statement::Label(_) | Statement::Directive(_, _) => (), - Statement::Instruction(name, args) => { - ret.extend(A::get_references(name, args)); - } - }; - ret -} - -pub fn symbols_in_args(args: &[Argument]) -> Vec<&str> { - let mut ret = Vec::new(); - for arg in args { - arg.post_visit_expressions(&mut |expr| { - if let Expression::Symbol(sym) = expr { - ret.push(sym.as_str()); - } - }); - } - - ret -} - -fn basic_block_references_starting_from( - statements: &[Statement], -) -> (Vec<&str>, Vec<&str>) { - let mut seen_labels = vec![]; - let mut referenced_labels = BTreeSet::<&str>::new(); - iterate_basic_block::(statements, |s| { - if let Statement::Label(l) = s { - seen_labels.push(l.as_str()); - } else { - referenced_labels.extend(references_in_statement::(s)) - } - }); - (referenced_labels.into_iter().collect(), seen_labels) -} - -fn iterate_basic_block<'a, R: Register, F: FunctionOpKind, A: Architecture>( - statements: &'a [Statement], - mut fun: impl FnMut(&'a Statement), -) { - for s in statements { - fun(s); - if ends_control_flow::(s) { - break; - } - } -} - -fn ends_control_flow(s: &Statement) -> bool { - match s { - // The rust compiler allows functions that end in the panic handler (`begin_unwind`) to never return. - // We use this directive to identify the end of these non-returning functions. - Statement::Directive(dir, _) if dir == ".cfi_endproc" => true, - Statement::Instruction(instruction, _) => { - A::instruction_ends_control_flow(instruction.as_str()) - } - _ => false, - } -} - -fn apply_replacement_to_instruction( - statement: &mut Statement, - replacements: &BTreeMap<&str, &str>, -) { - match statement { - Statement::Label(_) | Statement::Directive(_, _) => (), - Statement::Instruction(_, args) => { - for a in args { - a.post_visit_expressions_mut(&mut |expr| { - if let Expression::Symbol(s) = expr { - replace(s, replacements); - } - }); - } - } - } -} - -fn apply_replacement_to_object(object: &mut Vec, replacements: &BTreeMap<&str, &str>) { - for value in object { - if let DataValue::Reference(reference) = value { - if let Some(replacement) = replacements.get(reference.as_str()) { - *value = DataValue::Reference(replacement.to_string()) - } - } - } -} - -fn replace(s: &mut String, replacements: &BTreeMap<&str, &str>) { - if let Some(r) = replacements.get(s.as_str()) { - *s = r.to_string(); - } -} diff --git a/asm-utils/src/utils.rs b/asm-utils/src/utils.rs deleted file mode 100644 index 8e8d7ea384..0000000000 --- a/asm-utils/src/utils.rs +++ /dev/null @@ -1,66 +0,0 @@ -use crate::ast::{Argument, Expression, FunctionOpKind, Register}; - -pub fn next_aligned(val: usize, alignment: usize) -> usize { - // Alignment will probably always be a power of two, which can be aligned in - // a much faster bitwise operation. But then we would have to assert!() it, - // so it is just better to use the generic version. - ((val + (alignment - 1)) / alignment) * alignment -} - -/// Padding to next alignment boundary, in bytes. -pub fn alignment_size(from: usize, alignment: usize) -> usize { - let dest = next_aligned(from, alignment); - dest - from -} - -/// Split an slice as before and after the first occurrence of an element. -/// -/// The second return value is None if the element is not found. -pub fn split_at_first<'a, T: Eq>(s: &'a [T], elem: &T) -> (&'a [T], Option<&'a [T]>) { - match s.iter().position(|e| e == elem) { - Some(idx) => (&s[..idx], Some(&s[(idx + 1)..])), - None => (s, None), - } -} - -/// Find the position of the next given element in an iterable. -pub fn find_position>( - seq: impl IntoIterator, - elem: T, -) -> Option { - seq.into_iter().position(|e| e == elem) -} - -pub fn quote(s: &str) -> String { - // TODO more things to quote - format!("\"{}\"", s.replace('\\', "\\\\").replace('\"', "\\\"")) -} - -pub fn escape_label(l: &str) -> String { - // TODO make this proper - l.replace('.', "_dot_").replace('/', "_slash_") -} - -pub fn argument_to_symbol(x: &Argument) -> Option<&str> { - if let Argument::Expression(Expression::Symbol(symbol)) = x { - Some(symbol) - } else { - None - } -} - -pub fn argument_to_number(x: &Argument) -> Option { - if let Argument::Expression(expr) = x { - Some(expression_to_number(expr)?) - } else { - None - } -} - -pub fn expression_to_number(expr: &Expression) -> Option { - if let Expression::Number(n) = expr { - Some(*n as u32) - } else { - None - } -} diff --git a/cli-rs/src/main.rs b/cli-rs/src/main.rs index 79264d29f3..0db51a00ed 100644 --- a/cli-rs/src/main.rs +++ b/cli-rs/src/main.rs @@ -12,8 +12,10 @@ use powdr_pipeline::Pipeline; use powdr_riscv_executor::ProfilerOptions; use std::ffi::OsStr; -use std::{borrow::Cow, io::Write, path::Path}; -use std::{fs, io}; +use std::{ + io::{self, Write}, + path::Path, +}; use strum::{Display, EnumString, EnumVariantNames}; #[derive(Clone, EnumString, EnumVariantNames, Display)] @@ -62,37 +64,6 @@ enum Commands { #[arg(long)] coprocessors: Option, - /// Convert from the assembly files instead of the ELF executable. - #[arg(short, long)] - #[arg(default_value_t = false)] - asm: bool, - - /// Run a long execution in chunks (Experimental and not sound!) - #[arg(short, long)] - #[arg(default_value_t = false)] - continuations: bool, - }, - /// Compiles riscv assembly to powdr assembly. - RiscvAsm { - /// Input files - #[arg(required = true)] - files: Vec, - - /// The field to use - #[arg(long)] - #[arg(default_value_t = FieldArgument::Gl)] - #[arg(value_parser = clap_enum_variants!(FieldArgument))] - field: FieldArgument, - - /// Directory for output files. - #[arg(short, long)] - #[arg(default_value_t = String::from("."))] - output_directory: String, - - /// Comma-separated list of coprocessors. - #[arg(long)] - coprocessors: Option, - /// Run a long execution in chunks (Experimental and not sound!) #[arg(short, long)] #[arg(default_value_t = false)] @@ -224,36 +195,12 @@ fn run_command(command: Commands) { field, output_directory, coprocessors, - asm, continuations, } => { call_with_field!(compile_rust::( &file, Path::new(&output_directory), coprocessors, - !asm, - continuations - )) - } - Commands::RiscvAsm { - files, - field, - output_directory, - coprocessors, - continuations, - } => { - assert!(!files.is_empty()); - let name = if files.len() == 1 { - Cow::Owned(files[0].clone()) - } else { - Cow::Borrowed("output") - }; - - call_with_field!(compile_riscv_asm::( - &name, - files.into_iter(), - Path::new(&output_directory), - coprocessors, continuations )) } @@ -317,7 +264,6 @@ fn compile_rust( file_name: &str, output_dir: &Path, coprocessors: Option, - via_elf: bool, continuations: bool, ) -> Result<(), Vec> { let mut runtime = match coprocessors { @@ -336,49 +282,8 @@ fn compile_rust( runtime = runtime.with_poseidon_for_continuations(); } - powdr_riscv::compile_rust::( - file_name, - output_dir, - true, - &runtime, - via_elf, - continuations, - None, - ) - .ok_or_else(|| vec!["could not compile rust".to_string()])?; - - Ok(()) -} - -#[allow(clippy::too_many_arguments)] -fn compile_riscv_asm( - original_file_name: &str, - file_names: impl Iterator, - output_dir: &Path, - coprocessors: Option, - continuations: bool, -) -> Result<(), Vec> { - let runtime = match coprocessors { - Some(list) => { - powdr_riscv::Runtime::try_from(list.split(',').collect::>().as_ref()).unwrap() - } - None => powdr_riscv::Runtime::base(), - }; - - powdr_riscv::compile_riscv_asm_bundle::( - original_file_name, - file_names - .map(|name| { - let contents = fs::read_to_string(&name).unwrap(); - (name, contents) - }) - .collect(), - output_dir, - true, - &runtime, - continuations, - ) - .ok_or_else(|| vec!["could not compile RISC-V assembly".to_string()])?; + powdr_riscv::compile_rust::(file_name, output_dir, true, &runtime, continuations, None) + .ok_or_else(|| vec!["could not compile rust".to_string()])?; Ok(()) } diff --git a/asm-utils/Cargo.toml b/isa-utils/Cargo.toml similarity index 50% rename from asm-utils/Cargo.toml rename to isa-utils/Cargo.toml index f4d93320c4..91d49bf52e 100644 --- a/asm-utils/Cargo.toml +++ b/isa-utils/Cargo.toml @@ -1,14 +1,8 @@ [package] -name = "powdr-asm-utils" -description = "powdr utilities for LLVM / RISCV assembly" +name = "powdr-isa-utils" +description = "powdr utilities for translating from native ISA code (RISCV for now)" version = { workspace = true } edition = { workspace = true } license = { workspace = true } homepage = { workspace = true } repository = { workspace = true } - -[dependencies] -itertools = "0.13" - -[lints.clippy] -uninlined_format_args = "deny" diff --git a/isa-utils/src/lib.rs b/isa-utils/src/lib.rs new file mode 100644 index 0000000000..238ff7ff98 --- /dev/null +++ b/isa-utils/src/lib.rs @@ -0,0 +1,21 @@ +/// A single 32-bit data value. +pub enum SingleDataValue { + /// A literal value. + Value(u32), + /// The value of a pointer to a text label. Since there may be not a + /// 1-to-1 correspondence between nativa ISAs and Powdr ASM instructions, + /// this is passed unresolved to the code generator. + LabelReference(String), + /// Currently not supported. + Offset(String, String), +} + +pub fn quote(s: &str) -> String { + // TODO more things to quote + format!("\"{}\"", s.replace('\\', "\\\\").replace('\"', "\\\"")) +} + +pub fn escape_label(l: &str) -> String { + // TODO make this proper + l.replace('.', "_dot_").replace('/', "_slash_") +} diff --git a/pipeline/Cargo.toml b/pipeline/Cargo.toml index 67a89ec506..9e29aa376b 100644 --- a/pipeline/Cargo.toml +++ b/pipeline/Cargo.toml @@ -17,7 +17,6 @@ estark-polygon = ["powdr-backend/estark-polygon"] powdr-airgen.workspace = true powdr-analysis.workspace = true powdr-asm-to-pil.workspace = true -powdr-asm-utils.workspace = true powdr-ast.workspace = true powdr-backend.workspace = true powdr-executor.workspace = true diff --git a/riscv/Cargo.toml b/riscv/Cargo.toml index f04f3ef04c..9d5a611ab0 100644 --- a/riscv/Cargo.toml +++ b/riscv/Cargo.toml @@ -15,7 +15,7 @@ estark-polygon = ["powdr-pipeline/estark-polygon"] [dependencies] powdr-ast.workspace = true -powdr-asm-utils.workspace = true +powdr-isa-utils.workspace = true powdr-executor.workspace = true powdr-linker.workspace = true powdr-number.workspace = true diff --git a/riscv/benches/executor_benchmark.rs b/riscv/benches/executor_benchmark.rs index 9134223028..56a7891157 100644 --- a/riscv/benches/executor_benchmark.rs +++ b/riscv/benches/executor_benchmark.rs @@ -2,7 +2,7 @@ use ::powdr_pipeline::Pipeline; use powdr_number::GoldilocksField; use powdr_riscv::{ - asm, compile_rust_crate_to_riscv_asm, continuations::bootloader::default_input, Runtime, + compile_rust_crate_to_riscv, continuations::bootloader::default_input, elf, Runtime, }; use criterion::{criterion_group, criterion_main, Criterion}; @@ -16,9 +16,9 @@ fn executor_benchmark(c: &mut Criterion) { // Keccak let tmp_dir = Temp::new_dir().unwrap(); - let riscv_asm_files = - compile_rust_crate_to_riscv_asm("./tests/riscv_data/keccak/Cargo.toml", &tmp_dir, None); - let contents = asm::compile::(riscv_asm_files, &Runtime::base(), false); + let executable = + compile_rust_crate_to_riscv("./tests/riscv_data/keccak/Cargo.toml", &tmp_dir, None); + let contents = elf::translate::(&executable, &Runtime::base(), false); let mut pipeline = Pipeline::::default().from_asm_string(contents, None); pipeline.compute_optimized_pil().unwrap(); pipeline.compute_fixed_cols().unwrap(); @@ -28,13 +28,10 @@ fn executor_benchmark(c: &mut Criterion) { }); // The first chunk of `many_chunks`, with Poseidon co-processor & bootloader - let riscv_asm_files = compile_rust_crate_to_riscv_asm( - "./tests/riscv_data/many_chunks/Cargo.toml", - &tmp_dir, - None, - ); - let contents = asm::compile::( - riscv_asm_files, + let executable = + compile_rust_crate_to_riscv("./tests/riscv_data/many_chunks/Cargo.toml", &tmp_dir, None); + let contents = elf::translate::( + &executable, &Runtime::base().with_poseidon_for_continuations(), true, ); diff --git a/riscv/src/asm/disambiguator.rs b/riscv/src/asm/disambiguator.rs deleted file mode 100644 index db33bb1620..0000000000 --- a/riscv/src/asm/disambiguator.rs +++ /dev/null @@ -1,155 +0,0 @@ -use std::collections::{HashMap, HashSet}; - -use itertools::Itertools; - -use super::{Argument, Expression, Statement}; - -/// Disambiguates the collection of assembly files and concatenates it to a single list of statements. -/// Also disambiguates file ids (debugging information) and returns a list of all files with new IDs. -pub fn disambiguate( - mut assemblies: Vec<(String, Vec)>, -) -> (Vec, Vec<(i64, String, String)>) { - let globals = assemblies - .iter() - .flat_map(|(_, statements)| extract_globals(statements)) - .collect::>(); - - // Disambiguates the debug file references. - let file_ids = disambiguate_file_ids(&mut assemblies); - - ( - assemblies - .into_iter() - .map(|(name, mut statements)| { - disambiguate_file(&name, &mut statements, &globals); - statements - }) - .concat(), - file_ids, - ) -} - -fn extract_globals(statements: &[Statement]) -> HashSet { - statements - .iter() - .flat_map(|s| { - if let Statement::Directive(name, args) = s { - if name == ".globl" { - return args - .iter() - .map(|a| { - if let Argument::Expression(Expression::Symbol(s)) = a { - s.clone() - } else { - panic!("Invalid .globl directive: {s}"); - } - }) - // TODO possible without collect? - .collect(); - } - } - vec![] - }) - .collect() -} - -fn disambiguate_file(file_name: &str, statements: &mut [Statement], globals: &HashSet) { - let prefix = file_name.replace('-', "_dash_"); - for s in statements { - match s { - Statement::Label(l) => disambiguate_symbol_if_needed(l, &prefix, globals), - Statement::Directive(_, args) | Statement::Instruction(_, args) => { - for arg in args.iter_mut() { - disambiguate_argument_if_needed(arg, &prefix, globals); - } - } - } - } -} - -fn disambiguate_argument_if_needed(arg: &mut Argument, prefix: &str, globals: &HashSet) { - arg.post_visit_expressions_mut(&mut |expr| { - if let Expression::Symbol(sym) = expr { - disambiguate_symbol_if_needed(sym, prefix, globals); - } - }); -} - -fn disambiguate_symbol_if_needed(s: &mut String, prefix: &str, globals: &HashSet) { - if !s.starts_with('@') && !globals.contains(s.as_str()) { - *s = format!("{prefix}__{s}"); - } -} - -fn disambiguate_file_ids( - assemblies: &mut [(String, Vec)], -) -> Vec<(i64, String, String)> { - let debug_file_ids = assemblies - .iter() - .flat_map(|(name, statements)| extract_file_ids(name, statements)) - .collect::>(); - // ensure the ids are densely packed: - let debug_file_id_mapping = { - let mut map = HashMap::new(); - for (asm_name, file_id, ..) in debug_file_ids.iter() { - map.insert((asm_name.to_string(), *file_id), map.len() as i64 + 1); - } - map - }; - let new_debug_file_ids = debug_file_ids - .into_iter() - .map(|(asm_file, id, dir, file)| { - ( - debug_file_id_mapping[&(asm_file.to_string(), id)], - dir, - file, - ) - }) - .collect(); - assemblies.iter_mut().for_each(|(n, statements)| { - statements - .iter_mut() - .for_each(|s| replace_file_refs(n, s, &debug_file_id_mapping)) - }); - new_debug_file_ids -} - -/// Extracts all debug file IDs from the list of statements in the given assembly file. -fn extract_file_ids<'a>( - name: &'a str, - statements: &[Statement], -) -> Vec<(&'a str, i64, String, String)> { - statements - .iter() - .filter_map(|s| match s { - Statement::Directive(directive, args) if directive == ".file" => { - if let [ - Argument::Expression(Expression::Number(file_nr)), - Argument::StringLiteral(dir), - Argument::StringLiteral(file), - ] = &args[..] { - Some((name, *file_nr, std::str::from_utf8(dir).unwrap().to_string(), std::str::from_utf8(file).unwrap().to_string())) - } else { - None - } - } - _ => None, - }) - .unique() - .sorted() - .collect() -} - -fn replace_file_refs( - name: &str, - statement: &mut Statement, - id_mapping: &HashMap<(String, i64), i64>, -) { - if let Statement::Directive(directive, args) = statement { - if let (".file" | ".loc", [Argument::Expression(Expression::Number(file_nr)), ..]) = - (directive.as_str(), &mut args[..]) - { - *file_nr = id_mapping[&(name.to_string(), *file_nr)]; - } - } -} diff --git a/riscv/src/asm/mod.rs b/riscv/src/asm/mod.rs deleted file mode 100644 index dc7a749de8..0000000000 --- a/riscv/src/asm/mod.rs +++ /dev/null @@ -1,581 +0,0 @@ -use std::collections::{BTreeMap, BTreeSet, HashSet}; - -use itertools::Itertools; -use parser::RiscParser; -use powdr_asm_utils::{ - ast::{BinaryOpKind, UnaryOpKind}, - data_parser::{self, DataObjects}, - data_storage::store_data_objects, - parser::parse_asm, - reachability::{self, symbols_in_args}, - utils::{argument_to_number, argument_to_symbol, expression_to_number}, - Architecture, -}; -use powdr_number::FieldElement; - -use crate::{ - code_gen::{ - self, FunctionKind, InstructionArgs, MemEntry, Register, RiscVProgram, SourceFileInfo, - }, - Runtime, -}; - -mod disambiguator; -mod parser; - -type Statement = powdr_asm_utils::ast::Statement; -type Argument = powdr_asm_utils::ast::Argument; -type Expression = powdr_asm_utils::ast::Expression; - -struct AsmProgram { - file_ids: Vec<(i64, String, String)>, - mem_entries: Option>, - statements: Vec, -} - -const START_FUNCTION: &str = "__runtime_start"; - -impl RiscVProgram for AsmProgram { - fn take_source_files_info(&mut self) -> impl Iterator { - self.file_ids.iter().map(|(id, dir, file)| SourceFileInfo { - id: *id as u32, - dir, - file, - }) - } - - fn take_initial_mem(&mut self) -> impl Iterator { - std::mem::take(&mut self.mem_entries).unwrap().into_iter() - } - - fn take_executable_statements( - &mut self, - ) -> impl Iterator, impl InstructionArgs>> { - self.statements.iter().filter_map(process_statement) - } - - fn start_function(&self) -> impl AsRef { - START_FUNCTION - } -} - -impl InstructionArgs for &[Argument] { - type Error = &'static str; - - fn l(&self) -> Result, ::Error> { - const ERR: &str = "Expected: label"; - match self { - [l] => Ok(argument_to_symbol(l).ok_or(ERR)?), - _ => Err(ERR), - } - } - - fn r(&self) -> Result { - match self { - [Argument::Register(r1)] => Ok(*r1), - _ => Err("Expected: register"), - } - } - - fn rri(&self) -> Result<(Register, Register, u32), &'static str> { - const ERR: &str = "Expected: register, register, immediate"; - match self { - [Argument::Register(r1), Argument::Register(r2), n] => { - Ok((*r1, *r2, argument_to_number(n).ok_or(ERR)?)) - } - _ => Err(ERR), - } - } - - fn rrr(&self) -> Result<(Register, Register, Register), &'static str> { - match self { - [Argument::Register(r1), Argument::Register(r2), Argument::Register(r3) - | Argument::RegOffset(None | Some(Expression::Number(0)), r3)] => Ok((*r1, *r2, *r3)), - _ => Err("Expected: register, register, register"), - } - } - - fn rrr2(&self) -> Result<(Register, Register, Register), &'static str> { - // When reading from assembly, this is identical to rrr - self.rrr() - } - - fn ri(&self) -> Result<(Register, u32), &'static str> { - const ERR: &str = "Expected: register, immediate"; - match self { - [Argument::Register(r1), n] => Ok((*r1, argument_to_number(n).ok_or(ERR)?)), - _ => Err(ERR), - } - } - - fn rr(&self) -> Result<(Register, Register), &'static str> { - match self { - [Argument::Register(r1), Argument::Register(r2) - | Argument::RegOffset(None | Some(Expression::Number(0)), r2)] => Ok((*r1, *r2)), - _ => Err("Expected: register, register"), - } - } - - fn rrl( - &self, - ) -> Result< - (code_gen::Register, code_gen::Register, impl AsRef), - ::Error, - > { - const ERR: &str = "Expected: register, register, label"; - match self { - [Argument::Register(r1), Argument::Register(r2), l] => { - Ok((*r1, *r2, argument_to_symbol(l).ok_or(ERR)?)) - } - _ => Err(ERR), - } - } - - fn rl( - &self, - ) -> Result<(code_gen::Register, impl AsRef), ::Error> { - const ERR: &str = "Expected: register, label"; - match self { - [Argument::Register(r1), l] => Ok((*r1, argument_to_symbol(l).ok_or(ERR)?)), - _ => Err(ERR), - } - } - - fn rro(&self) -> Result<(Register, Register, u32), &'static str> { - const ERR: &str = "Expected: register, offset(register)"; - - match self { - [Argument::Register(r1), Argument::RegOffset(off, r2)] => Ok(( - *r1, - *r2, - expression_to_number(off.as_ref().unwrap_or(&Expression::Number(0))).ok_or(ERR)?, - )), - [Argument::Register(r1), Argument::Expression(off)] => { - Ok((*r1, Register::new(0), expression_to_number(off).ok_or(ERR)?)) - } - _ => Err(ERR), - } - } - - fn empty(&self) -> Result<(), &'static str> { - match self { - [] => Ok(()), - _ => Err("Expected: no arguments"), - } - } -} - -/// Compiles riscv assembly to a powdr assembly file. Adds required library routines. -pub fn compile( - assemblies: BTreeMap, - runtime: &Runtime, - with_bootloader: bool, -) -> String { - let asm_program = compile_internal(assemblies); - - code_gen::translate_program::(asm_program, runtime, with_bootloader) -} - -fn compile_internal(mut assemblies: BTreeMap) -> AsmProgram { - // stack grows towards zero - let stack_start = 0x10000000; - // data grows away from zero - let data_start = 0x10000100; - - assert!(assemblies - .insert("__runtime".to_string(), global_declarations(stack_start)) - .is_none()); - - // TODO remove unreferenced files. - let (mut statements, file_ids) = disambiguator::disambiguate( - assemblies - .into_iter() - .map(|(name, contents)| (name, parse_asm(RiscParser::default(), &contents))) - .collect(), - ); - let DataObjects { - sections: mut data_sections, - adhoc_symbols: mut data_positions, - } = data_parser::extract_data_objects(&statements); - - // Reduce to the code that is actually reachable from main - // (and the objects that are referred from there) - let data_labels = reachability::filter_reachable_from::<_, _, RiscvArchitecture>( - START_FUNCTION, - &mut statements, - &mut data_sections, - ); - - // Replace dynamic references to code labels - replace_dynamic_label_references(&mut statements, &data_labels); - - let mut mem_entries = Vec::new(); - store_data_objects( - data_sections, - data_start, - &mut |label, addr, value| { - mem_entries.push(MemEntry { label, addr, value }); - }, - &mut data_positions, - ); - - let statements = substitute_symbols_with_values(statements, &data_positions); - - AsmProgram { - file_ids, - mem_entries: Some(mem_entries), - statements, - } -} - -/// Replace certain patterns of references to code labels by -/// special instructions. We ignore any references to data objects -/// because they will be handled differently. -fn replace_dynamic_label_references(statements: &mut Vec, data_labels: &HashSet<&str>) { - /* - Find patterns of the form - lui a0, %hi(LABEL) - addi s10, a0, %lo(LABEL) - - - turn this into the pseudoinstruction - li s10, LABEL - which is then turned into - - s10 <== load_label(LABEL) - - It gets complicated by the fact that sometimes, labels - and debugging directives occur between the two statements - matching that pattern... - */ - let instruction_indices = statements - .iter() - .enumerate() - .filter_map(|(i, s)| match s { - Statement::Instruction(_, _) => Some(i), - _ => None, - }) - .collect::>(); - - let mut to_delete = BTreeSet::default(); - for (i1, i2) in instruction_indices.into_iter().tuple_windows() { - if let Some(r) = - replace_dynamic_label_reference(&statements[i1], &statements[i2], data_labels) - { - to_delete.insert(i1); - statements[i2] = r; - } - } - - let mut i = 0; - statements.retain(|_| (!to_delete.contains(&i), i += 1).0); -} - -fn replace_dynamic_label_reference( - s1: &Statement, - s2: &Statement, - data_labels: &HashSet<&str>, -) -> Option { - let Statement::Instruction(instr1, args1) = s1 else { - return None; - }; - let Statement::Instruction(instr2, args2) = s2 else { - return None; - }; - if instr1.as_str() != "lui" || instr2.as_str() != "addi" { - return None; - }; - let [Argument::Register(r1), Argument::Expression(Expression::FunctionOp(FunctionKind::HiDataRef, expr1))] = - &args1[..] - else { - return None; - }; - // Maybe should try to reduce expr1 and expr2 before comparing deciding it is a pure symbol? - let Expression::Symbol(label1) = expr1.as_ref() else { - return None; - }; - let [Argument::Register(r2), Argument::Register(r3), Argument::Expression(Expression::FunctionOp(FunctionKind::LoDataRef, expr2))] = - &args2[..] - else { - return None; - }; - let Expression::Symbol(label2) = expr2.as_ref() else { - return None; - }; - if r1 != r3 || label1 != label2 || data_labels.contains(label1.as_str()) { - return None; - } - Some(Statement::Instruction( - "li".to_string(), - vec![ - Argument::Register(*r2), - Argument::Expression(Expression::Symbol(label1.clone())), - ], - )) -} - -fn substitute_symbols_with_values( - mut statements: Vec, - data_positions: &BTreeMap, -) -> Vec { - for s in &mut statements { - let Statement::Instruction(_name, args) = s else { - continue; - }; - for arg in args { - arg.post_visit_expressions_mut(&mut |expression| match expression { - Expression::Number(_) => {} - Expression::Symbol(symb) => { - if let Some(pos) = data_positions.get(symb) { - *expression = Expression::Number(*pos as i64) - } - } - Expression::UnaryOp(op, subexpr) => { - if let Expression::Number(num) = subexpr.as_ref() { - let result = match op { - UnaryOpKind::BitwiseNot => !num, - UnaryOpKind::Negation => -num, - }; - *expression = Expression::Number(result); - }; - } - Expression::BinaryOp(op, subexprs) => { - if let (Expression::Number(a), Expression::Number(b)) = - (&subexprs[0], &subexprs[1]) - { - let result = match op { - BinaryOpKind::Or => a | b, - BinaryOpKind::Xor => a ^ b, - BinaryOpKind::And => a & b, - BinaryOpKind::LeftShift => a << b, - BinaryOpKind::RightShift => a >> b, - BinaryOpKind::Add => a + b, - BinaryOpKind::Sub => a - b, - BinaryOpKind::Mul => a * b, - BinaryOpKind::Div => a / b, - BinaryOpKind::Mod => a % b, - }; - *expression = Expression::Number(result); - } - } - Expression::FunctionOp(op, subexpr) => { - if let Expression::Number(num) = subexpr.as_ref() { - let result = match op { - FunctionKind::HiDataRef => num >> 12, - FunctionKind::LoDataRef => num & 0xfff, - }; - *expression = Expression::Number(result); - }; - } - }); - } - } - statements -} - -fn process_statement(s: &Statement) -> Option> { - match s { - Statement::Label(l) => Some(code_gen::Statement::Label(l)), - Statement::Directive(directive, args) => match (directive.as_str(), &args[..]) { - ( - ".loc", - [Argument::Expression(Expression::Number(file)), Argument::Expression(Expression::Number(line)), Argument::Expression(Expression::Number(column)), ..], - ) => Some(code_gen::Statement::DebugLoc { - file: *file as u64, - line: *line as u64, - col: *column as u64, - }), - (".file", _) => { - // We ignore ".file" directives because they have been extracted to the top. - None - } - (".size", _) => { - // We ignore ".size" directives - None - } - (".option", _) => { - // We ignore ".option" directives - None - } - _ if directive.starts_with(".cfi_") => None, - _ => panic!( - "Leftover directive in code: {directive} {}", - args.iter().format(", ") - ), - }, - Statement::Instruction(instr, args) => { - // TODO: maybe restore this debug info - /* - let stmt_str = format!("{s}"); - // remove indentation and trailing newline - let stmt_str = &stmt_str[2..(stmt_str.len() - 1)]; - let mut ret = vec![format!(" .debug insn \"{stmt_str}\";")]; - */ - Some(code_gen::Statement::Instruction { - op: instr, - args: args.as_slice(), - }) - } - } -} - -struct RiscvArchitecture {} - -impl Architecture for RiscvArchitecture { - fn instruction_ends_control_flow(instr: &str) -> bool { - match instr { - "li" | "lui" | "la" | "mv" | "add" | "addi" | "sub" | "neg" | "mul" | "mulh" - | "mulhu" | "mulhsu" | "divu" | "remu" | "xor" | "xori" | "and" | "andi" | "or" - | "ori" | "not" | "slli" | "sll" | "srli" | "srl" | "srai" | "seqz" | "snez" - | "slt" | "slti" | "sltu" | "sltiu" | "sgtz" | "beq" | "beqz" | "bgeu" | "bltu" - | "blt" | "bge" | "bltz" | "blez" | "bgtz" | "bgez" | "bne" | "bnez" | "jal" - | "jalr" | "call" | "ecall" | "ebreak" | "lw" | "lb" | "lbu" | "lh" | "lhu" | "sw" - | "sh" | "sb" | "nop" | "fence" | "amoadd.w" | "amoadd.w.aq" | "amoadd.w.rl" - | "amoadd.w.aqrl" | "lr.w" | "lr.w.aq" | "lr.w.rl" | "lr.w.aqrl" | "sc.w" - | "sc.w.aq" | "sc.w.rl" | "sc.w.aqrl" => false, - "j" | "jr" | "tail" | "ret" | "unimp" => true, - _ => { - panic!("Unknown instruction: {instr}"); - } - } - } - - fn get_references< - 'a, - R: powdr_asm_utils::ast::Register, - F: powdr_asm_utils::ast::FunctionOpKind, - >( - instr: &str, - args: &'a [powdr_asm_utils::ast::Argument], - ) -> Vec<&'a str> { - // fence arguments are not symbols, they are like reserved - // keywords affecting the instruction behavior - if instr.starts_with("fence") { - Vec::new() - } else { - symbols_in_args(args) - } - } -} - -/// Maps an instruction in .insn syntax to Statement::Instruction() in the expected format. -/// -/// See https://www.rowleydownload.co.uk/arm/documentation/gnu/as/RISC_002dV_002dFormats.html -pub fn map_insn_i( - opcode6: Expression, - func3: Expression, - rd: Register, - rs1: Register, - simm12: Expression, -) -> Statement { - let (Expression::Number(opcode6), Expression::Number(func3)) = (opcode6, func3) else { - panic!("Only literal opcode and function are supported in .insn syntax"); - }; - - // These are almost all instructions in RISC-V Instruction Set Manual that - // we are supposed to implement and roughly fits the pattern of the I-type - // instruction. Only "csr*i" instructions are missing. - - // First we try to match the instructions that uses the I-type encoding - // ordinarily, i.e. where all fields are what they are supposed to be: - let name = match (opcode6, func3) { - (0b1100111, 0b000) => "jalr", - (0b0000011, 0b000) => "lb", - (0b0000011, 0b001) => "lh", - (0b0000011, 0b010) => "lw", - (0b0000011, 0b100) => "lbu", - (0b0000011, 0b101) => "lhu", - (0b0010011, 0b000) => "addi", - (0b0010011, 0b010) => "slti", - (0b0010011, 0b011) => "sltiu", - (0b0010011, 0b100) => "xori", - (0b0010011, 0b110) => "ori", - (0b0010011, 0b111) => "andi", - (0b1110011, 0b001) => "csrrw", - (0b1110011, 0b010) => "csrrs", - (0b1110011, 0b011) => "csrrc", - // won't interpret "csr*i" instructions because it is too weird to - // encode an immediate as a register - opfunc => { - // We now try the instructions that take certain liberties with the - // I-type encoding, and don't use the standard arguments for it. - let name = match opfunc { - (0b0001111, 0b000) => "fence", - (0b0001111, 0b001) => "fence.i", - (0b1110011, 0b000) => { - let Expression::Number(simm12) = simm12 else { - panic!( - "Only literal simm12 is supported for ecall and ebreak instructions" - ); - }; - match simm12 { - 0 => "ecall", - 1 => "ebreak", - _ => panic!("unknown instruction"), - } - } - _ => panic!("unsupported .insn instruction"), - }; - return Statement::Instruction(name.to_string(), Vec::new()); - } - }; - - let args = vec![ - Argument::Register(rd), - Argument::Register(rs1), - Argument::Expression(simm12), - ]; - - Statement::Instruction(name.to_string(), args) -} - -fn global_declarations(stack_start: u32) -> String { - [ - "__divdi3", - "__udivdi3", - "__udivti3", - "__divdf3", - "__muldf3", - "__moddi3", - "__umoddi3", - "__umodti3", - "__eqdf2", - "__ltdf2", - "__nedf2", - "__unorddf2", - "__floatundidf", - "__extendsfdf2", - "memcpy", - "memmove", - "memset", - "memcmp", - "bcmp", - "strlen", - ] - .map(|n| format!(".globl {n}@plt\n.globl {n}\n.set {n}@plt, {n}\n")) - .join("\n\n") - + &[("__rust_alloc_error_handler", "__rg_oom")] - .map(|(n, m)| format!(".globl {n}\n.set {n}, {m}\n")) - .join("\n\n") - + - // some extra symbols expected by rust code: - // - __rust_no_alloc_shim_is_unstable: compilation time acknowledgment - // that this feature is unstable. - // - __rust_alloc_error_handler_should_panic: needed by the default - // alloc error handler, not sure why it's not present in the asm. - // https://github.com/rust-lang/rust/blob/ae9d7b0c6434b27e4e2effe8f05b16d37e7ef33f/library/alloc/src/alloc.rs#L415 - // - __stack_start: the start of the stack - // - __global_pointer$: a RISC-V special symbol that we actually don't - // use, but we define for compatibility with programs that expect it. - &format!(r".data -.globl __rust_alloc_error_handler_should_panic -__rust_alloc_error_handler_should_panic: .byte 0 -.globl __rust_no_alloc_shim_is_unstable -__rust_no_alloc_shim_is_unstable: .byte 0 -.globl __powdr_stack_start -.set __powdr_stack_start, {stack_start} -.globl __global_pointer$ -.set __global_pointer$, 0 - -.text -") -} diff --git a/riscv/src/asm/parser.rs b/riscv/src/asm/parser.rs deleted file mode 100644 index 6b4b10a53e..0000000000 --- a/riscv/src/asm/parser.rs +++ /dev/null @@ -1,35 +0,0 @@ -use lalrpop_util::*; - -use crate::code_gen::{FunctionKind, Register}; -use powdr_parser_util::handle_parse_error; - -use super::Statement; - -lalrpop_mod!( - #[allow(dead_code)] - #[allow(clippy::all)] - #[allow(clippy::uninlined_format_args)] - riscv_asm, - "/asm/riscv_asm.rs" -); - -pub struct RiscParser { - parser: riscv_asm::StatementsParser, -} - -impl Default for RiscParser { - fn default() -> Self { - Self { - parser: riscv_asm::StatementsParser::new(), - } - } -} - -impl powdr_asm_utils::parser::Parser for RiscParser { - fn parse(&self, input: &str) -> Result, String> { - self.parser.parse(input).map_err(|err| { - handle_parse_error(err, None, input).output_to_stderr(); - panic!("RISCV assembly parse error"); - }) - } -} diff --git a/riscv/src/asm/riscv_asm.lalrpop b/riscv/src/asm/riscv_asm.lalrpop deleted file mode 100644 index 12e63a7273..0000000000 --- a/riscv/src/asm/riscv_asm.lalrpop +++ /dev/null @@ -1,200 +0,0 @@ -//! Much of this can be reused between front-ends, excluding elements like -//! [FunctionKind](crate::FunctionKind) which needs to be parsed in an -//! front-end specific way. -//! -//! However it's currently difficult/impossible to share one larlpop grammar in -//! multiple places while adding slight tweaks. This requires a change to -//! larlpop like: -//! https://github.com/lalrpop/lalrpop/issues/42#issuecomment-288973232 -//! -//! There are some messier solutions like, appending two larlpop files together -//! to create the "slightly tweaked" grammar, but this is not ideal. - -use std::str::FromStr; -use powdr_asm_utils::ast::{unescape_string, BinaryOpKind as BOp, UnaryOpKind as UOp, - new_binary_op as bin_op, new_unary_op as un_op, new_function_op as fn_op}; -use super::super::{Argument, Register, Statement, FunctionKind as FOp, Expression, map_insn_i}; -use powdr_parser_util::Error; - -grammar; - -extern { - type Error = Error; -} - -match { - r"\s*" => { }, - r"#[^\n\r]*[\n\r]*" => { }, // Skip `# comments` - r"x[0-9]", - r"x1[0-9]", - r"x2[0-9]", - r"x3[0-1]", - r"a[0-7]", - r"s[0-1]", - r"s[2-9]", - r"s1[0-1]", - r"t[0-2]", - r"t[3-6]", - _, -} - -pub Statements: Vec = { - => ls.into_iter().flatten().chain(s.into_iter()).collect(), -} - -LabelOrStatement: Option = { - Label => Some(<>), - ";" => <> -} - -Label: Statement = { - ":" => Statement::Label(<>), - ":" => Statement::Label(<>.to_string()) -} - -Statement: Statement = { - SimpleDirective, - Directive, - Instruction, -} - -SimpleDirective: Statement = { - SimpleDirectiveName SimpleArgument* => Statement::Directive(<>), -} - -SimpleDirectiveName: String = { - ".file" => <>.to_string(), - ".loc" => <>.to_string(), -} - -Directive: Statement = { - => Statement::Directive(<>) -} - -Instruction: Statement = { - , - => Statement::Instruction(<>) -} - -InsnDirective: Statement = { - ".insn" "i" "," "," "," "," => map_insn_i(opcode6, func3, rd, rs1, simm12), - ".insn" "i" "," "," "," "(" ")" => map_insn_i(opcode6, func3, rd, rs1, simm12) - // TODO: implement the other kinds of .insn instructions. - // See https://www.rowleydownload.co.uk/arm/documentation/gnu/as/RISC_002dV_002dFormats.html -} - -Arguments: Vec = { - => vec![], - "," )*> => { list.push(end); list } -} - -SimpleArgument: Argument = { - StringLiteral => Argument::StringLiteral(<>), - Symbol => Argument::Expression(Expression::Symbol(<>)), - Number => Argument::Expression(Expression::Number(<>)), -} - -Argument: Argument = { - Register => Argument::Register(<>), - OffsetRegister, - StringLiteral => Argument::StringLiteral(<>), - Expression => Argument::Expression(<>), -} - -Register: Register = { - r"x[0-9]" => Register::new(<>[1..].parse().unwrap()), - r"x1[0-9]" => Register::new(<>[1..].parse().unwrap()), - r"x2[0-9]" => Register::new(<>[1..].parse().unwrap()), - r"x3[0-1]" => Register::new(<>[1..].parse().unwrap()), - "zero" => Register::new(0), - "ra" => Register::new(1), - "sp" => Register::new(2), - "gp" => Register::new(3), - "tp" => Register::new(4), - r"a[0-7]" => Register::new(10 + <>[1..].parse::().unwrap()), - "fp" => Register::new(8), - r"s[0-1]" => Register::new(8 + <>[1..].parse::().unwrap()), - r"s[2-9]" => Register::new(16 + <>[1..].parse::().unwrap()), - r"s1[0-1]" => Register::new(16 + <>[1..].parse::().unwrap()), - r"t[0-2]" => Register::new(5 + <>[1..].parse::().unwrap()), - r"t[3-6]" => Register::new(25 + <>[1..].parse::().unwrap()), -} - -OffsetRegister: Argument = { - "(" ")" => Argument::RegOffset(<>), -} - -Expression: Expression = { - ExprBinaryOr, -} - -ExprBinaryOr: Expression = { - "|" => bin_op(BOp::Or, l, r), - ExprBinaryXor, -} - -ExprBinaryXor: Expression = { - "^" => bin_op(BOp::Xor, l, r), - ExprBinaryAnd, -} - -ExprBinaryAnd: Expression = { - "&" => bin_op(BOp::And, l, r), - ExprBitShift, -} - -ExprBitShift: Expression = { - "<<" => bin_op(BOp::LeftShift, l, r), - ">>" => bin_op(BOp::RightShift, l, r), - ExprSum, -} - -ExprSum: Expression = { - "+" => bin_op(BOp::Add, l, r), - "-" => bin_op(BOp::Sub, l, r), - ExprProduct, -} - -ExprProduct: Expression = { - "*" => bin_op(BOp::Mul, l, r), - "/" => bin_op(BOp::Div, l, r), - "%" => bin_op(BOp::Mod, l, r), - ExprUnary, -} - -ExprUnary: Expression = { - "~" => un_op(UOp::BitwiseNot, <>), - "-" => un_op(UOp::Negation, <>), - "+" , - ExprTerm, -} - -ExprTerm: Expression = { - Number => Expression::Number(<>), - "(" ")" => <>, - "%hi(" ")" => fn_op(FOp::HiDataRef, <>), - "%lo(" ")" => fn_op(FOp::LoDataRef, <>), - Symbol => Expression::Symbol(<>) -} - -StringLiteral: Vec = { - r#""[^\\"\n\r]*(\\[tnfbrx'"\\0-9][^\\"\n\r]*)*""# => unescape_string(<>) -} - -DotlessSymbol: String = { - r"[a-zA-Z_@][a-zA-Z$_0-9.@]*" => <>.to_string() -} - -DottedSymbol: String = { - r"\.[a-zA-Z_@.][a-zA-Z$_0-9.@]*" => <>.to_string() -} - -Symbol: String = { - DotlessSymbol, - DottedSymbol, -} - -Number: i64 = { - r"[0-9][0-9_]*" => u64::from_str(<>).unwrap() as i64, - r"0x[0-9A-Fa-f][0-9A-Fa-f_]*" => u64::from_str_radix(&<>[2..].replace('_', ""), 16).unwrap() as i64, -} diff --git a/riscv/src/code_gen.rs b/riscv/src/code_gen.rs index ef581d9d39..0903d12e0d 100644 --- a/riscv/src/code_gen.rs +++ b/riscv/src/code_gen.rs @@ -1,8 +1,7 @@ use std::{fmt, vec}; use itertools::Itertools; -use powdr_asm_utils::data_storage::SingleDataValue; -use powdr_asm_utils::utils::{escape_label, quote}; +use powdr_isa_utils::{escape_label, quote, SingleDataValue}; use powdr_number::{FieldElement, KnownField}; use crate::continuations::bootloader::{bootloader_and_shutdown_routine, bootloader_preamble}; @@ -27,8 +26,6 @@ impl Register { } } -impl powdr_asm_utils::ast::Register for Register {} - impl fmt::Display for Register { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if self.value < 32 { @@ -71,23 +68,6 @@ impl From<&str> for Register { } } -#[derive(Clone, Copy, Debug)] -pub enum FunctionKind { - HiDataRef, - LoDataRef, -} - -impl powdr_asm_utils::ast::FunctionOpKind for FunctionKind {} - -impl fmt::Display for FunctionKind { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - FunctionKind::HiDataRef => write!(f, "%hi"), - FunctionKind::LoDataRef => write!(f, "%lo"), - } - } -} - pub enum Statement<'a, L: AsRef, A: InstructionArgs> { DebugLoc { file: u64, line: u64, col: u64 }, Label(L), diff --git a/riscv/src/elf/mod.rs b/riscv/src/elf/mod.rs index b4e688f483..996a0eeb5f 100644 --- a/riscv/src/elf/mod.rs +++ b/riscv/src/elf/mod.rs @@ -13,7 +13,7 @@ use goblin::elf::{ Elf, ProgramHeader, }; use itertools::{Either, Itertools}; -use powdr_asm_utils::data_storage::SingleDataValue; +use powdr_isa_utils::SingleDataValue; use powdr_number::FieldElement; use raki::{ decode::Decode, diff --git a/riscv/src/lib.rs b/riscv/src/lib.rs index 10dea2cb26..eea631596e 100644 --- a/riscv/src/lib.rs +++ b/riscv/src/lib.rs @@ -3,20 +3,16 @@ use std::{ borrow::Cow, - collections::BTreeMap, ffi::OsStr, path::{Path, PathBuf}, process::Command, }; -use mktemp::Temp; use powdr_number::FieldElement; -use serde_json::Value as JsonValue; use std::fs; pub use crate::runtime::Runtime; -pub mod asm; mod code_gen; pub mod continuations; pub mod elf; @@ -32,7 +28,6 @@ pub fn compile_rust( output_dir: &Path, force_overwrite: bool, runtime: &Runtime, - via_elf: bool, with_bootloader: bool, features: Option>, ) -> Option<(PathBuf, String)> { @@ -51,52 +46,16 @@ pub fn compile_rust( panic!("input must be a crate directory or `Cargo.toml` file"); }; - if via_elf { - let elf_path = compile_rust_crate_to_riscv_bin(&file_path, output_dir, features); - - compile_riscv_elf::( - file_name, - &elf_path, - output_dir, - force_overwrite, - runtime, - with_bootloader, - ) - } else { - let riscv_asm = compile_rust_crate_to_riscv_asm(&file_path, output_dir, features); - if !output_dir.exists() { - fs::create_dir_all(output_dir).unwrap() - } - for (asm_file_name, contents) in &riscv_asm { - let riscv_asm_file_name = output_dir.join(format!( - "{}_riscv_{asm_file_name}.asm", - Path::new(file_path.as_ref()) - .file_stem() - .unwrap() - .to_str() - .unwrap(), - )); - if riscv_asm_file_name.exists() && !force_overwrite { - eprintln!( - "Target file {} already exists. Not overwriting.", - riscv_asm_file_name.to_str().unwrap() - ); - return None; - } - - fs::write(riscv_asm_file_name.clone(), contents).unwrap(); - log::info!("Wrote {}", riscv_asm_file_name.to_str().unwrap()); - } + let elf_path = compile_rust_crate_to_riscv(&file_path, output_dir, features); - compile_riscv_asm_bundle::( - file_name, - riscv_asm, - output_dir, - force_overwrite, - runtime, - with_bootloader, - ) - } + compile_riscv_elf::( + file_name, + &elf_path, + output_dir, + force_overwrite, + runtime, + with_bootloader, + ) } fn compile_program

( @@ -132,25 +91,6 @@ fn compile_program

( Some((powdr_asm_file_name, powdr_asm)) } -pub fn compile_riscv_asm_bundle( - original_file_name: &str, - riscv_asm_files: BTreeMap, - output_dir: &Path, - force_overwrite: bool, - runtime: &Runtime, - with_bootloader: bool, -) -> Option<(PathBuf, String)> { - compile_program::>( - original_file_name, - riscv_asm_files, - output_dir, - force_overwrite, - runtime, - with_bootloader, - asm::compile::, - ) -} - /// Translates a RISC-V ELF file to powdr asm. pub fn compile_riscv_elf( original_file_name: &str, @@ -179,164 +119,98 @@ macro_rules! as_ref [ }; ]; -pub struct CompilationResult { - pub executable: Option, - assemblies: BTreeMap, -} - -impl CompilationResult { - pub fn load_asm_files(self) -> BTreeMap { - self.assemblies - .into_iter() - .map(|(name, filename)| { - let contents = fs::read_to_string(filename).unwrap(); - (name, contents) - }) - .collect() - } -} - pub fn compile_rust_crate_to_riscv( input_dir: &str, output_dir: &Path, features: Option>, -) -> CompilationResult { +) -> PathBuf { const CARGO_TARGET_DIR: &str = "cargo_target"; let target_dir = output_dir.join(CARGO_TARGET_DIR); - let use_std = is_std_enabled_in_runtime(input_dir); + let metadata = CargoMetadata::from_input_dir(input_dir); - // We call cargo twice, once to perform the actual building, and once to get - // the build plan json, so we know exactly which object files to use. - - // Real build run. - let build_status = - build_cargo_command(input_dir, &target_dir, use_std, features.clone(), false) - .status() - .unwrap(); + // Run build. + let build_status = build_cargo_command( + input_dir, + &target_dir, + metadata.use_std, + features.clone(), + false, + ) + .status() + .unwrap(); assert!(build_status.success()); - // Build plan run. We must set the target dir to a temporary directory, - // otherwise cargo will screw up the build done previously. - let (build_plan, plan_dir): (JsonValue, PathBuf) = { - let plan_dir = Temp::new_dir().unwrap(); - let build_plan_run = build_cargo_command(input_dir, &plan_dir, use_std, features, true) - .output() - .unwrap(); - assert!(build_plan_run.status.success()); - - ( - serde_json::from_slice(&build_plan_run.stdout).unwrap(), - plan_dir.to_path_buf(), - ) - }; - - let mut assemblies = BTreeMap::new(); - - let JsonValue::Array(invocations) = &build_plan["invocations"] else { - panic!("no invocations in cargo build plan"); + let target = if metadata.use_std { + TARGET_STD + } else { + TARGET_NO_STD }; - let mut executable = None; - - log::debug!("RISC-V assembly files of this build:"); - let target = Path::new(if use_std { TARGET_STD } else { TARGET_NO_STD }); - for i in invocations { - let JsonValue::Array(outputs) = &i["outputs"] else { - panic!("no outputs in cargo build plan"); - }; - for output in outputs { - let output = Path::new(output.as_str().unwrap()); - // Replace the plan_dir with the target_dir, because the later is - // where the files actually are. - let parent = target_dir.join(output.parent().unwrap().strip_prefix(&plan_dir).unwrap()); - if parent.ends_with(target.join("release/deps")) { - let extension = output.extension(); - let name_stem = if Some(OsStr::new("rmeta")) == extension { - // Have to convert to string to remove the "lib" prefix: - output - .file_stem() - .unwrap() - .to_str() - .unwrap() - .strip_prefix("lib") - .unwrap() - } else if extension.is_none() { - assert!(executable.is_none(), "Multiple executables found"); - let file_stem = output.file_stem().unwrap(); - executable = Some(parent.join(file_stem)); - file_stem.to_str().unwrap() - } else { - continue; - }; - - let mut asm_name = parent.join(name_stem); - asm_name.set_extension("s"); - - log::debug!(" - {}", asm_name.to_string_lossy()); - assert!( - assemblies.insert(name_stem.to_string(), asm_name).is_none(), - "Duplicate assembly file name: {name_stem}", - ); - } - } - } - - CompilationResult { - executable, - assemblies, - } + // TODO: support more than one executable per crate. + assert_eq!(metadata.bins.len(), 1); + target_dir + .join(target) + .join("release") + .join(&metadata.bins[0]) } -pub fn compile_rust_crate_to_riscv_asm( - input_dir: &str, - output_dir: &Path, - features: Option>, -) -> BTreeMap { - compile_rust_crate_to_riscv(input_dir, output_dir, features).load_asm_files() +struct CargoMetadata { + bins: Vec, + use_std: bool, } -pub fn compile_rust_crate_to_riscv_bin( - input_dir: &str, - output_dir: &Path, - features: Option>, -) -> PathBuf { - compile_rust_crate_to_riscv(input_dir, output_dir, features) - .executable - .unwrap() -} +impl CargoMetadata { + fn from_input_dir(input_dir: &str) -> Self { + // Calls `cargo metadata --format-version 1 --no-deps --manifest-path ` to determine + // if the `std` feature is enabled in the dependency crate `powdr-riscv-runtime`. + let metadata = Command::new("cargo") + .args(as_ref![ + OsStr; + "metadata", + "--format-version", + "1", + "--no-deps", + "--manifest-path", + input_dir, + ]) + .output() + .unwrap(); -fn is_std_enabled_in_runtime(input_dir: &str) -> bool { - // Calls `cargo metadata --format-version 1 --no-deps --manifest-path ` to determine - // if the `std` feature is enabled in the dependency crate `powdr-riscv-runtime`. - let metadata = Command::new("cargo") - .args(as_ref![ - OsStr; - "metadata", - "--format-version", - "1", - "--no-deps", - "--manifest-path", - input_dir, - ]) - .output() - .unwrap(); - - let metadata: serde_json::Value = serde_json::from_slice(&metadata.stdout).unwrap(); - let packages = metadata["packages"].as_array().unwrap(); - packages.iter().any(|package| { - package["dependencies"] - .as_array() - .unwrap() + let metadata: serde_json::Value = serde_json::from_slice(&metadata.stdout).unwrap(); + let packages = metadata["packages"].as_array().unwrap(); + + // Is the `std` feature enabled in the `powdr-riscv-runtime` crate? + let use_std = packages.iter().any(|package| { + package["dependencies"] + .as_array() + .unwrap() + .iter() + .any(|dependency| { + dependency["name"] == "powdr-riscv-runtime" + && dependency["features"] + .as_array() + .unwrap() + .contains(&"std".into()) + }) + }); + + let bins = packages .iter() - .any(|dependency| { - dependency["name"] == "powdr-riscv-runtime" - && dependency["features"] - .as_array() - .unwrap() - .contains(&"std".into()) + .filter_map(|package| { + let targets = package["targets"].as_array().unwrap(); + targets.iter().find_map(|target| { + if target["kind"] == "bin" { + Some(target["name"].as_str().unwrap().to_string()) + } else { + None + } + }) }) - }) + .collect(); + + Self { bins, use_std } + } } fn build_cargo_command( diff --git a/riscv/tests/instructions.rs b/riscv/tests/instructions.rs index 5dc811062a..607994362d 100644 --- a/riscv/tests/instructions.rs +++ b/riscv/tests/instructions.rs @@ -3,31 +3,13 @@ mod common; mod instruction_tests { use std::path::Path; - use crate::common::{verify_riscv_asm_file, verify_riscv_asm_string}; - use powdr_number::GoldilocksField; - use powdr_riscv::asm::compile; + use crate::common::verify_riscv_asm_file; use powdr_riscv::Runtime; use test_log::test; - fn run_instruction_test(path: &Path, name: &str) { + fn run_instruction_test(path: &Path) { // Test from ELF path: verify_riscv_asm_file(path, &Runtime::base(), false); - - if name == "rvc" { - // "rvc" test is not supported via assembly path - return; - } - - // Test from assembly path: - // TODO Should we create one powdr-asm from all tests or keep them separate? - let assembly = std::fs::read_to_string(path).unwrap(); - let powdr_asm = compile::( - [(name.to_string(), assembly)].into(), - &Runtime::base(), - false, - ); - - verify_riscv_asm_string::<()>(&format!("{name}.asm"), &powdr_asm, Default::default(), None); } include!(concat!(env!("OUT_DIR"), "/instruction_tests.rs")); diff --git a/riscv/tests/riscv.rs b/riscv/tests/riscv.rs index 5066b8d1ed..14dec1a6b6 100644 --- a/riscv/tests/riscv.rs +++ b/riscv/tests/riscv.rs @@ -22,23 +22,14 @@ pub fn test_continuations(case: &str) { let runtime = Runtime::base().with_poseidon_for_continuations(); let temp_dir = Temp::new_dir().unwrap(); - let compiled = powdr_riscv::compile_rust_crate_to_riscv( + let executable = powdr_riscv::compile_rust_crate_to_riscv( &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, None, ); // Test continuations from ELF file. - let powdr_asm = powdr_riscv::elf::translate::( - compiled.executable.as_ref().unwrap(), - &runtime, - true, - ); - run_continuations_test(case, powdr_asm); - - // Test continuations from assembly files. - let powdr_asm = - powdr_riscv::asm::compile::(compiled.load_asm_files(), &runtime, true); + let powdr_asm = powdr_riscv::elf::translate::(&executable, &runtime, true); run_continuations_test(case, powdr_asm); } @@ -67,7 +58,7 @@ fn bn254_sanity_check() { let case = "trivial"; let temp_dir = Temp::new_dir().unwrap(); - let compiled = powdr_riscv::compile_rust_crate_to_riscv( + let executable = powdr_riscv::compile_rust_crate_to_riscv( &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, None, @@ -75,11 +66,7 @@ fn bn254_sanity_check() { log::info!("Verifying {case} converted from ELF file"); let runtime = Runtime::base(); - let from_elf = powdr_riscv::elf::translate::( - compiled.executable.as_ref().unwrap(), - &runtime, - false, - ); + let from_elf = powdr_riscv::elf::translate::(&executable, &runtime, false); let temp_dir = mktemp::Temp::new_dir().unwrap().release(); @@ -236,8 +223,7 @@ fn password() { #[ignore = "Too slow"] fn std_hello_world() { let case = "std_hello_world"; - // We only test via ELF because std is not supported via assembly. - verify_riscv_crate_impl::<()>(case, vec![], &Runtime::base(), true, false, None); + verify_riscv_crate(case, vec![], &Runtime::base()); } #[test] @@ -318,12 +304,12 @@ fn read_slice() { let case = "read_slice"; let runtime = Runtime::base(); let temp_dir = Temp::new_dir().unwrap(); - let riscv_asm = powdr_riscv::compile_rust_crate_to_riscv_asm( + let executable = powdr_riscv::compile_rust_crate_to_riscv( &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, None, ); - let powdr_asm = powdr_riscv::asm::compile::(riscv_asm, &runtime, false); + let powdr_asm = powdr_riscv::elf::translate::(&executable, &runtime, false); let data: Vec = vec![]; let answer = data.iter().sum::(); @@ -406,18 +392,15 @@ fn features() { // no features let expected = 0; - let compiled = powdr_riscv::compile_rust_crate_to_riscv( + let executable = powdr_riscv::compile_rust_crate_to_riscv( &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, None, ); log::info!("Verifying {case} converted from ELF file"); - let from_elf = powdr_riscv::elf::translate::( - compiled.executable.as_ref().unwrap(), - &Runtime::base(), - false, - ); + let from_elf = + powdr_riscv::elf::translate::(&executable, &Runtime::base(), false); verify_riscv_asm_string::( &format!("{case}_from_elf.asm"), &from_elf, @@ -427,18 +410,15 @@ fn features() { // "add_two" let expected = 2; - let compiled = powdr_riscv::compile_rust_crate_to_riscv( + let executable = powdr_riscv::compile_rust_crate_to_riscv( &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, Some(vec!["add_two".to_string()]), ); log::info!("Verifying {case} converted from ELF file"); - let from_elf = powdr_riscv::elf::translate::( - compiled.executable.as_ref().unwrap(), - &Runtime::base(), - false, - ); + let from_elf = + powdr_riscv::elf::translate::(&executable, &Runtime::base(), false); verify_riscv_asm_string::( &format!("{case}_from_elf.asm"), &from_elf, @@ -448,18 +428,15 @@ fn features() { // "add_two" and "add_three" let expected = 5; - let compiled = powdr_riscv::compile_rust_crate_to_riscv( + let executable = powdr_riscv::compile_rust_crate_to_riscv( &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, Some(vec!["add_two".to_string(), "add_three".to_string()]), ); log::info!("Verifying {case} converted from ELF file"); - let from_elf = powdr_riscv::elf::translate::( - compiled.executable.as_ref().unwrap(), - &Runtime::base(), - false, - ); + let from_elf = + powdr_riscv::elf::translate::(&executable, &Runtime::base(), false); verify_riscv_asm_string::( &format!("{case}_from_elf.asm"), &from_elf, @@ -477,12 +454,12 @@ fn many_chunks_dry() { let case = "many_chunks"; let runtime = Runtime::base().with_poseidon_for_continuations(); let temp_dir = Temp::new_dir().unwrap(); - let riscv_asm = powdr_riscv::compile_rust_crate_to_riscv_asm( + let executable = powdr_riscv::compile_rust_crate_to_riscv( &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, None, ); - let powdr_asm = powdr_riscv::asm::compile::(riscv_asm, &runtime, true); + let powdr_asm = powdr_riscv::elf::translate::(&executable, &runtime, true); let mut pipeline = Pipeline::default() .from_asm_string(powdr_asm, Some(PathBuf::from(case))) @@ -504,12 +481,12 @@ fn output_syscall() { let case = "output"; let runtime = Runtime::base(); let temp_dir = Temp::new_dir().unwrap(); - let riscv_asm = powdr_riscv::compile_rust_crate_to_riscv_asm( + let executable = powdr_riscv::compile_rust_crate_to_riscv( &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, None, ); - let powdr_asm = powdr_riscv::asm::compile::(riscv_asm, &runtime, false); + let powdr_asm = powdr_riscv::elf::translate::(&executable, &runtime, false); let inputs = vec![1u32, 2, 3] .into_iter() @@ -547,7 +524,7 @@ fn many_chunks_memory() { } fn verify_riscv_crate(case: &str, inputs: Vec, runtime: &Runtime) { - verify_riscv_crate_impl::<()>(case, inputs, runtime, true, true, None) + verify_riscv_crate_impl::<()>(case, inputs, runtime, None) } fn verify_riscv_crate_with_data( @@ -556,48 +533,28 @@ fn verify_riscv_crate_with_data( runtime: &Runtime, data: Vec<(u32, S)>, ) { - verify_riscv_crate_impl(case, inputs, runtime, true, true, Some(data)) + verify_riscv_crate_impl(case, inputs, runtime, Some(data)) } fn verify_riscv_crate_impl( case: &str, inputs: Vec, runtime: &Runtime, - via_elf: bool, - via_asm: bool, data: Option>, ) { let temp_dir = Temp::new_dir().unwrap(); - let compiled = powdr_riscv::compile_rust_crate_to_riscv( + let executable = powdr_riscv::compile_rust_crate_to_riscv( &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, None, ); - if via_elf { - log::info!("Verifying {case} converted from ELF file"); - let from_elf = powdr_riscv::elf::translate::( - compiled.executable.as_ref().unwrap(), - runtime, - false, - ); - verify_riscv_asm_string( - &format!("{case}_from_elf.asm"), - &from_elf, - &inputs, - data.as_deref(), - ); - } - - if via_asm { - log::info!("Verifying {case} converted from assembly files"); - let from_asm = - powdr_riscv::asm::compile::(compiled.load_asm_files(), runtime, false); - verify_riscv_asm_string( - &format!("{case}_from_asm.asm"), - &from_asm, - &inputs, - data.as_deref(), - ); - } + log::info!("Verifying {case}"); + let from_elf = powdr_riscv::elf::translate::(&executable, runtime, false); + verify_riscv_asm_string( + &format!("{case}_from_elf.asm"), + &from_elf, + &inputs, + data.as_deref(), + ); } From a7446def56baaab3fe19585485d138c79a5f2e55 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Mon, 30 Sep 2024 14:50:01 +0100 Subject: [PATCH 2/6] Fix instruction tests build.rs. --- riscv/build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/riscv/build.rs b/riscv/build.rs index 56ff84f179..7f7a33002f 100644 --- a/riscv/build.rs +++ b/riscv/build.rs @@ -42,7 +42,7 @@ fn build_instruction_tests() { #[test] #[ignore = "Too slow"] fn {file_name}() {{ - run_instruction_test(Path::new(r#"{file}"#), r#"{file_name}"#); + run_instruction_test(Path::new(r#"{file}"#)); }} "##, file = file.path().to_str().unwrap(), From 0c79dd80510ac735b1416372ddf36c784c276e52 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Mon, 30 Sep 2024 14:53:48 +0100 Subject: [PATCH 3/6] Removing stale dependencies. --- riscv/Cargo.toml | 9 +-------- riscv/build.rs | 8 -------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/riscv/Cargo.toml b/riscv/Cargo.toml index 9d5a611ab0..b8fa1ab6ea 100644 --- a/riscv/Cargo.toml +++ b/riscv/Cargo.toml @@ -29,16 +29,8 @@ gimli = "0.31" goblin = "0.8" lazy_static = "1.4.0" itertools = "0.13" -lalrpop-util = { version = "^0.19", features = ["lexer"] } log = "0.4.17" -mktemp = "0.5.0" -num-traits = "0.2.15" raki = "0.1.4" -# This is only here to work around https://github.com/lalrpop/lalrpop/issues/750 -# It should be removed once that workaround is no longer needed. -regex-syntax = { version = "0.6", default-features = false, features = [ - "unicode", -] } serde_json = "1.0" thiserror = "1.0" static_assertions = "1.1.0" @@ -50,6 +42,7 @@ lalrpop = "^0.19" powdr-number.workspace = true powdr-backend.workspace = true +mktemp = "0.5.0" test-log = "0.2.12" env_logger = "0.10.0" hex = "0.4.3" diff --git a/riscv/build.rs b/riscv/build.rs index 7f7a33002f..663e9f29d9 100644 --- a/riscv/build.rs +++ b/riscv/build.rs @@ -8,17 +8,9 @@ use std::path::Path; extern crate lalrpop; fn main() { - build_lalrpop(); build_instruction_tests(); } -fn build_lalrpop() { - lalrpop::Configuration::new() - .emit_rerun_directives(true) - .process_current_dir() - .unwrap(); -} - #[allow(clippy::print_stdout)] fn build_instruction_tests() { let out_dir = env::var("OUT_DIR").unwrap(); From 0511024e69f8badee14a6433897815562660ac65 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Mon, 30 Sep 2024 15:47:01 +0100 Subject: [PATCH 4/6] Fixing identification of binaries. --- riscv/src/lib.rs | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/riscv/src/lib.rs b/riscv/src/lib.rs index eea631596e..919046a69a 100644 --- a/riscv/src/lib.rs +++ b/riscv/src/lib.rs @@ -197,15 +197,20 @@ impl CargoMetadata { let bins = packages .iter() - .filter_map(|package| { - let targets = package["targets"].as_array().unwrap(); - targets.iter().find_map(|target| { - if target["kind"] == "bin" { - Some(target["name"].as_str().unwrap().to_string()) - } else { - None - } - }) + .flat_map(|package| { + package["targets"] + .as_array() + .unwrap() + .iter() + .filter_map(|target| { + target["kind"].as_array().and_then(|kind| { + if kind.contains(&"bin".into()) { + Some(target["name"].as_str().unwrap().to_string()) + } else { + None + } + }) + }) }) .collect(); From 907ac5894b0468a32ed92e484ece56cd3c2d2a8b Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Mon, 30 Sep 2024 16:17:22 +0100 Subject: [PATCH 5/6] read-slices as executable --- riscv/tests/riscv_data/read_slice/src/{lib.rs => main.rs} | 1 + 1 file changed, 1 insertion(+) rename riscv/tests/riscv_data/read_slice/src/{lib.rs => main.rs} (96%) diff --git a/riscv/tests/riscv_data/read_slice/src/lib.rs b/riscv/tests/riscv_data/read_slice/src/main.rs similarity index 96% rename from riscv/tests/riscv_data/read_slice/src/lib.rs rename to riscv/tests/riscv_data/read_slice/src/main.rs index b39b268e60..6f57f3eb18 100644 --- a/riscv/tests/riscv_data/read_slice/src/lib.rs +++ b/riscv/tests/riscv_data/read_slice/src/main.rs @@ -1,3 +1,4 @@ +#![no_main] #![no_std] extern crate alloc; From 7c67bd7ab35b69b7ac7f8acc48ff172210342953 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Mon, 30 Sep 2024 16:29:03 +0100 Subject: [PATCH 6/6] Removing unneeded stuff from the build command. --- .github/workflows/nightly-tests.yml | 4 ++- .github/workflows/pr-tests.yml | 12 +++++-- riscv/src/lib.rs | 53 +++++------------------------ 3 files changed, 20 insertions(+), 49 deletions(-) diff --git a/.github/workflows/nightly-tests.yml b/.github/workflows/nightly-tests.yml index fdaf20f48a..7729e4ad5b 100644 --- a/.github/workflows/nightly-tests.yml +++ b/.github/workflows/nightly-tests.yml @@ -69,8 +69,10 @@ jobs: run: rustup toolchain install 1.81-x86_64-unknown-linux-gnu - name: Install nightly run: rustup toolchain install nightly-2024-08-01-x86_64-unknown-linux-gnu - - name: Install stdlib + - name: Install std source run: rustup component add rust-src --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu + - name: Install riscv target + run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu - name: Install EStarkPolygon prover dependencies run: sudo apt-get install -y nlohmann-json3-dev libpqxx-dev nasm - name: Install pilcom diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml index f9f9589e07..59bc48e2ea 100644 --- a/.github/workflows/pr-tests.yml +++ b/.github/workflows/pr-tests.yml @@ -91,8 +91,10 @@ jobs: run: rustup toolchain install 1.81-x86_64-unknown-linux-gnu && rustup component add clippy --toolchain 1.81-x86_64-unknown-linux-gnu && rustup component add rustfmt --toolchain 1.81-x86_64-unknown-linux-gnu - name: Install nightly run: rustup toolchain install nightly-2024-08-01-x86_64-unknown-linux-gnu - - name: Install stdlib + - name: Install std source run: rustup component add rust-src --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu + - name: Install riscv target + run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu - name: Install pilcom run: git clone https://github.com/0xPolygonHermez/pilcom.git && cd pilcom && npm install - uses: taiki-e/install-action@nextest @@ -148,8 +150,10 @@ jobs: run: rustup toolchain install 1.81-x86_64-unknown-linux-gnu && rustup component add clippy --toolchain 1.81-x86_64-unknown-linux-gnu && rustup component add rustfmt --toolchain 1.81-x86_64-unknown-linux-gnu - name: Install nightly run: rustup toolchain install nightly-2024-08-01-x86_64-unknown-linux-gnu - - name: Install stdlib + - name: Install std source run: rustup component add rust-src --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu + - name: Install riscv target + run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu - name: Install pilcom run: git clone https://github.com/0xPolygonHermez/pilcom.git && cd pilcom && npm install - name: Install EStarkPolygon prover system dependency @@ -198,8 +202,10 @@ jobs: run: sudo apt-get install -y binutils-riscv64-unknown-elf lld - name: Install nightly run: rustup toolchain install nightly-2024-08-01-x86_64-unknown-linux-gnu - - name: Install stdlib + - name: Install std source run: rustup component add rust-src --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu + - name: Install riscv target + run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu - name: Install pilcom run: git clone https://github.com/0xPolygonHermez/pilcom.git && cd pilcom && npm install - uses: taiki-e/install-action@nextest diff --git a/riscv/src/lib.rs b/riscv/src/lib.rs index 919046a69a..e146bccc43 100644 --- a/riscv/src/lib.rs +++ b/riscv/src/lib.rs @@ -130,15 +130,10 @@ pub fn compile_rust_crate_to_riscv( let metadata = CargoMetadata::from_input_dir(input_dir); // Run build. - let build_status = build_cargo_command( - input_dir, - &target_dir, - metadata.use_std, - features.clone(), - false, - ) - .status() - .unwrap(); + let build_status = + build_cargo_command(input_dir, &target_dir, metadata.use_std, features.clone()) + .status() + .unwrap(); assert!(build_status.success()); let target = if metadata.use_std { @@ -223,22 +218,18 @@ fn build_cargo_command( target_dir: &Path, use_std: bool, features: Option>, - produce_build_plan: bool, ) -> Command { /* The explanation for the more exotic options we are using to build the user code: - `--emit=asm`: tells rustc to emit the assembly code of the program. This is the - actual input for the Powdr assembly translator. This is not needed in ELF path. - `-C link-arg=-Tpowdr.x`: tells the linker to use the `powdr.x` linker script, provided by `powdr-riscv-runtime` crate. It configures things like memory layout - of the program and the entry point function. This is not needed in ASM path. + of the program and the entry point function. `-C link-arg=--emit-relocs`: this is a requirement from Powdr ELF translator, it tells the linker to leave in the final executable the linkage relocation tables. The ELF translator uses this information to lift references to text address into - labels in the Powdr assembly. This is not needed in ASM path. + labels in the Powdr assembly. `-C passes=loweratomic`: risc0 target does not support atomic instructions. When they are needed, LLVM makes calls to software emulation functions it expects to @@ -259,22 +250,12 @@ fn build_cargo_command( `memcmp`, etc, for systems that doesn't already have them, like ours, as LLVM assumes these functions to be available. We also use `compiler_builtins` for `#[no_std]` programs, but in there it is enabled by default. - - `-Zbuild-std=core,alloc`: while there are pre-packaged builds of `core` and - `alloc` for riscv32imac target, we still need their assembly files generated - during compilation to translate via ASM path, so we explicitly build them. - - `-Zunstable-options --build-plan`: the build plan is a cargo unstable feature - that outputs a JSON with all the information about the build, which include the - paths of the object files generated. We use this build plan to find the assembly - files generated by the build, needed in the ASM path, and to find the executable - ELF file, needed in the ELF path. */ let mut cmd = Command::new("cargo"); cmd.env( "RUSTFLAGS", - "--emit=asm -g -C link-arg=-Tpowdr.x -C link-arg=--emit-relocs -C passes=lower-atomic -C panic=abort", + "-g -C link-arg=-Tpowdr.x -C link-arg=--emit-relocs -C passes=lower-atomic -C panic=abort", ); let mut args: Vec<&OsStr> = as_ref![ @@ -299,15 +280,7 @@ fn build_cargo_command( "-Zbuild-std-features=default,compiler-builtins-mem", ]); } else { - args.extend(as_ref![ - OsStr; - TARGET_NO_STD, - // TODO: the following switch can be removed once we drop support to - // asm path, but the following command will have to be added to CI: - // - // rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu - "-Zbuild-std=core,alloc" - ]); + args.push(OsStr::new(TARGET_NO_STD)); }; // we can't do this inside the if because we need to keep a reference to the string @@ -319,16 +292,6 @@ fn build_cargo_command( } } - // TODO: if asm path is removed, there are better ways to find the - // executable name than relying on the unstable build plan. - if produce_build_plan { - args.extend(as_ref![ - OsStr; - "-Zunstable-options", - "--build-plan" - ]); - } - cmd.args(args); cmd }