Skip to content
This repository has been archived by the owner on Aug 15, 2024. It is now read-only.

perf/arc slice polys #42

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ lazy_static = "*"
arrayvec = "0.7"
const_format = "0.2"
bincode = "*"
ecow = "*"
ethereum-types = "=0.14.1"
cs_derive = { path = "./cs_derive" }
itertools = "0.10"
Expand Down
2 changes: 1 addition & 1 deletion rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[toolchain]
channel = "nightly-2023-06-25"
channel = "nightly-2023-08-01"
48 changes: 33 additions & 15 deletions src/cs/implementations/copy_permutation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ use crate::{
};
use crate::{cs::traits::GoodAllocator, field::PrimeField};

use std::sync::Arc;

pub fn num_intermediate_partial_product_relations(
num_copys_under_copy_permutation: usize,
quotient_degree: usize,
Expand Down Expand Up @@ -60,10 +62,11 @@ pub(crate) fn pointwise_rational<
{
let non_residue = P::constant(*non_residue, ctx);
worker.scope(typical_size, |scope, chunk_size| {
let basis_storage = unsafe { Arc::get_mut_unchecked(&mut basis.storage) };
for (((w, sigma), x_poly), dst) in (witness_poly.storage.chunks(chunk_size))
.zip(sigma_poly.storage.chunks(chunk_size))
.zip(precomputed_x_poly.storage.chunks(chunk_size))
.zip(basis.storage.chunks_mut(chunk_size))
.zip(basis_storage.chunks_mut(chunk_size))
{
let mut ctx = *ctx;
scope.spawn(move |_| {
Expand Down Expand Up @@ -149,12 +152,16 @@ pub(crate) fn pointwise_rational_in_extension<
{
let non_residue = P::constant(*non_residue, ctx);
worker.scope(typical_size, |scope, chunk_size| {
let (basis_c0_storage, basis_c1_storage) = unsafe {(
Arc::get_mut_unchecked(&mut basis_c0.storage),
Arc::get_mut_unchecked(&mut basis_c1.storage),
)};
for ((((w, sigma), x_poly), dst_c0), dst_c1) in
(witness_poly.storage.chunks(chunk_size))
.zip(sigma_poly.storage.chunks(chunk_size))
.zip(precomputed_x_poly.storage.chunks(chunk_size))
.zip(basis_c0.storage.chunks_mut(chunk_size))
.zip(basis_c1.storage.chunks_mut(chunk_size))
.zip(basis_c0_storage.chunks_mut(chunk_size))
.zip(basis_c1_storage.chunks_mut(chunk_size))
{
let mut ctx = *ctx;
scope.spawn(move |_| {
Expand Down Expand Up @@ -281,8 +288,9 @@ pub(crate) fn pointwise_product_into<

for source in inputs.iter() {
worker.scope(typical_size, |scope, chunk_size| {
let into_storage = unsafe { Arc::get_mut_unchecked(&mut into.storage)};
for (dst, src) in
(into.storage.chunks_mut(chunk_size)).zip(source.storage.chunks(chunk_size))
(into_storage.chunks_mut(chunk_size)).zip(source.storage.chunks(chunk_size))
{
let mut ctx = *ctx;
scope.spawn(move |_| {
Expand Down Expand Up @@ -336,13 +344,23 @@ pub(crate) fn pointwise_product_in_extension_into<
) {
let typical_size = into_c0.storage.len(); // we need raw length in counts of P

let (into_c0_storage, into_c1_storage) = unsafe {(
Arc::get_mut_unchecked(&mut into_c0.storage),
Arc::get_mut_unchecked(&mut into_c1.storage),
)};

// 18: 0x5617ec14977f - boojum::cs::implementations::copy_permutation::pointwise_product_in_extension_into::h7afc923f8b42188e
// 19: 0x5617ec14a7b6 - boojum::cs::implementations::copy_permutation::compute_partial_products_in_extension::h2e6bd4b1c53ed759
// 20: 0x5617ebcb9319 - boojum::cs::implementations::prover::<impl boojum::cs::implementations::reference_cs::CSReferenceAssembly<F,P,CFG,A>>::prove_cpu_basic::h088688d6cd7fed3a
// 21: 0x5617ebbdff22 - boojum::cs::implementations::convenience::<impl boojum::cs::implementations::reference_cs::CSReferenceAssembly<F,P,CFG,A>>::prove_from_precomputations::hb5faa7c379034a0a
// 22: 0x5617ebeecd3a - zkevm_test_harness::prover_utils::prove_base_layer_circuit::h6ca12e453ce37157

for source in inputs.iter() {
let [src_c0, src_c1] = source;
worker.scope(typical_size, |scope, chunk_size| {
for (((dst_c0, dst_c1), src_c0), src_c1) in into_c0
.storage
for (((dst_c0, dst_c1), src_c0), src_c1) in into_c0_storage
.chunks_mut(chunk_size)
.zip(into_c1.storage.chunks_mut(chunk_size))
.zip(into_c1_storage.chunks_mut(chunk_size))
.zip(src_c0.storage.chunks(chunk_size))
.zip(src_c1.storage.chunks(chunk_size))
{
Expand Down Expand Up @@ -625,8 +643,7 @@ pub(crate) fn compute_partial_products<

// we have to apply pointwise products on top of Z(x)

for el in partial_elementwise_products.into_iter() {
let mut el = el;
for mut el in partial_elementwise_products.into_iter() {
pointwise_product_into(&previous, &mut el, worker, ctx);

// we have new pointwise in el, and untouched previous, so we can reuse the storage
Expand Down Expand Up @@ -756,8 +773,7 @@ pub(crate) fn compute_partial_products_in_extension<

// we have to apply pointwise products on top of Z(x)

for el in partial_elementwise_products.into_iter() {
let [mut el_c0, mut el_c1] = el;
for [mut el_c0, mut el_c1] in partial_elementwise_products.into_iter() {
pointwise_product_in_extension_into::<F, P, EXT, A>(
&previous, &mut el_c0, &mut el_c1, worker, ctx,
);
Expand Down Expand Up @@ -1232,12 +1248,14 @@ pub(crate) fn compute_quotient_terms_in_extension<
}
}

unsafe { std::sync::Arc::get_mut_unchecked(&mut dst_c0.storage[outer]) }
.storage[inner]
unsafe {
Arc::get_mut_unchecked(&mut dst_c0.storage[outer].storage)[inner]
}
.add_assign(&contribution_c0, &mut ctx);

unsafe { std::sync::Arc::get_mut_unchecked(&mut dst_c1.storage[outer]) }
.storage[inner]
unsafe {
Arc::get_mut_unchecked(&mut dst_c1.storage[outer].storage)[inner]
}
.add_assign(&contribution_c1, &mut ctx);

iterator.advance();
Expand Down
45 changes: 45 additions & 0 deletions src/cs/implementations/fast_serialization.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use core::slice;
use ecow::EcoVec;
use std::{
alloc::Allocator,
error::Error,
Expand Down Expand Up @@ -135,6 +136,50 @@ impl<T: MemcopySerializable> MemcopySerializable for std::sync::Arc<T> {
}
}

// FIXME: write a more optimized version :)
impl<
F: SmallField,
P: crate::field::traits::field_like::PrimeFieldLikeVectorized<Base = F>,
> MemcopySerializable for EcoVec<P>
{
fn write_into_buffer<W: Write>(&self, dst: W) -> Result<(), Box<dyn Error>> {
let mut as_vec = Vec::with_capacity(self.len());
as_vec.extend_from_slice(self.as_slice());
MemcopySerializable::write_into_buffer(&as_vec, dst)?;

Ok(())
}

fn read_from_buffer<R: Read>(src: R) -> Result<Self, Box<dyn Error>> {
let vec: Vec<P> = MemcopySerializable::read_from_buffer(src)?;
Ok(vec.as_slice().into())
}
}

impl<
F: SmallField,
P: crate::field::traits::field_like::PrimeFieldLikeVectorized<Base = F>,
A: GoodAllocator + 'static,
> MemcopySerializable for std::sync::Arc<[P], A>
where Vec<P, A>: MemcopySerializable
{
fn write_into_buffer<W: Write>(&self, dst: W) -> Result<(), Box<dyn Error>> {
MemcopySerializable::write_into_buffer(&self.as_ref().to_owned(), dst)?;

Ok(())
}

fn read_from_buffer<R: Read>(src: R) -> Result<Self, Box<dyn Error>> {
let vec: Vec<P, A> = MemcopySerializable::read_from_buffer(src)?;
let arc_slice = unsafe {
let mut arc_slice = std::sync::Arc::new_uninit_slice_in(vec.len(), A::default());
std::mem::MaybeUninit::write_slice(std::sync::Arc::get_mut_unchecked(&mut arc_slice), vec.as_slice());
arc_slice.assume_init()
};
Ok(arc_slice)
}
}

// Prime field like vectors are the special case, and it's only implemented for vector!

impl<
Expand Down
7 changes: 4 additions & 3 deletions src/cs/implementations/lookup_argument.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ pub(crate) fn compute_lookup_poly_pairs_over_general_purpose_columns<
let mut selector: GenericPolynomial<F, LagrangeForm, P, A> = (*constant_polys[0]).clone();
if lookup_selector_path[0] == false {
worker.scope(selector.storage.len(), |scope, chunk_size| {
for dst in selector.storage.chunks_mut(chunk_size) {
let selector_storage = unsafe { std::sync::Arc::get_mut_unchecked(&mut selector.storage) };
for dst in selector_storage.chunks_mut(chunk_size) {
let mut ctx = *ctx;
scope.spawn(move |_| {
// inverse
Expand All @@ -87,8 +88,8 @@ pub(crate) fn compute_lookup_poly_pairs_over_general_purpose_columns<
.zip(constant_polys[1..].iter())
{
worker.scope(selector.storage.len(), |scope, chunk_size| {
for (dst, src) in selector
.storage
let selector_storage = unsafe { std::sync::Arc::get_mut_unchecked(&mut selector.storage) };
for (dst, src) in selector_storage
.chunks_mut(chunk_size)
.zip(src.storage.chunks(chunk_size))
{
Expand Down
44 changes: 20 additions & 24 deletions src/cs/implementations/lookup_argument_in_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1082,20 +1082,16 @@ pub(crate) fn compute_quotient_terms_for_lookup_specialized<
// so we just add

unsafe {
std::sync::Arc::get_mut_unchecked(
&mut aggregated_lookup_columns_c0.storage[outer],
)
.storage[inner]
.add_assign(&tmp_c0, &mut ctx);
};
std::sync::Arc::get_mut_unchecked(&mut aggregated_lookup_columns_c0.storage[outer].storage)
[inner]
}
.add_assign(&tmp_c0, &mut ctx);

unsafe {
std::sync::Arc::get_mut_unchecked(
&mut aggregated_lookup_columns_c1.storage[outer],
)
.storage[inner]
.add_assign(&tmp_c1, &mut ctx);
};
std::sync::Arc::get_mut_unchecked(&mut aggregated_lookup_columns_c1.storage[outer].storage)
[inner]
}
.add_assign(&tmp_c1, &mut ctx);

lde_iter.advance();
}
Expand Down Expand Up @@ -1211,15 +1207,15 @@ pub(crate) fn compute_quotient_terms_for_lookup_specialized<

// add into accumulator
unsafe {
std::sync::Arc::get_mut_unchecked(&mut dst_c0.storage[outer]).storage
std::sync::Arc::get_mut_unchecked(&mut dst_c0.storage[outer].storage)
[inner]
.add_assign(&tmp_c0, &mut ctx);
};
}
.add_assign(&tmp_c0, &mut ctx);
unsafe {
std::sync::Arc::get_mut_unchecked(&mut dst_c1.storage[outer]).storage
std::sync::Arc::get_mut_unchecked(&mut dst_c1.storage[outer].storage)
[inner]
.add_assign(&tmp_c1, &mut ctx);
};
}
.add_assign(&tmp_c1, &mut ctx);

lde_iter.advance();
}
Expand Down Expand Up @@ -1300,15 +1296,15 @@ pub(crate) fn compute_quotient_terms_for_lookup_specialized<

// add into accumulator
unsafe {
std::sync::Arc::get_mut_unchecked(&mut dst_c0.storage[outer]).storage
std::sync::Arc::get_mut_unchecked(&mut dst_c0.storage[outer].storage)
[inner]
.add_assign(&tmp_c0, &mut ctx);
};
}
.add_assign(&tmp_c0, &mut ctx);
unsafe {
std::sync::Arc::get_mut_unchecked(&mut dst_c1.storage[outer]).storage
std::sync::Arc::get_mut_unchecked(&mut dst_c1.storage[outer].storage)
[inner]
.add_assign(&tmp_c1, &mut ctx);
};
}
.add_assign(&tmp_c1, &mut ctx);

lde_iter.advance();
}
Expand Down
Loading