From aab4e2066c5653e00b6606986e676580ab57b830 Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Tue, 4 Jun 2024 17:19:48 -0700 Subject: [PATCH 01/72] flat to trie layout migration --- das/local_file_storage_service.go | 364 ++++++++++++++++++++++++++++-- 1 file changed, 351 insertions(+), 13 deletions(-) diff --git a/das/local_file_storage_service.go b/das/local_file_storage_service.go index 8be03bcb30..61545b9d8d 100644 --- a/das/local_file_storage_service.go +++ b/das/local_file_storage_service.go @@ -6,10 +6,13 @@ package das import ( "bytes" "context" - "encoding/base32" "errors" "fmt" + "io" "os" + "path" + "path/filepath" + "regexp" "time" "github.com/ethereum/go-ethereum/common" @@ -37,30 +40,31 @@ func LocalFileStorageConfigAddOptions(prefix string, f *flag.FlagSet) { type LocalFileStorageService struct { dataDir string + + legacyLayout flatLayout + layout trieLayout } func NewLocalFileStorageService(dataDir string) (StorageService, error) { if unix.Access(dataDir, unix.W_OK|unix.R_OK) != nil { return nil, fmt.Errorf("couldn't start LocalFileStorageService, directory '%s' must be readable and writeable", dataDir) } - return &LocalFileStorageService{dataDir: dataDir}, nil + return &LocalFileStorageService{ + dataDir: dataDir, + legacyLayout: flatLayout{root: dataDir}, + layout: trieLayout{root: dataDir}, + }, nil } func (s *LocalFileStorageService) GetByHash(ctx context.Context, key common.Hash) ([]byte, error) { log.Trace("das.LocalFileStorageService.GetByHash", "key", pretty.PrettyHash(key), "this", s) - pathname := s.dataDir + "/" + EncodeStorageServiceKey(key) - data, err := os.ReadFile(pathname) + batchPath := s.legacyLayout.batchPath(key) + data, err := os.ReadFile(batchPath) if err != nil { - // Just for backward compatability. - pathname = s.dataDir + "/" + base32.StdEncoding.EncodeToString(key.Bytes()) - data, err = os.ReadFile(pathname) - if err != nil { - if errors.Is(err, os.ErrNotExist) { - return nil, ErrNotFound - } - return nil, err + if errors.Is(err, os.ErrNotExist) { + return nil, ErrNotFound } - return data, nil + return nil, err } return data, nil } @@ -123,3 +127,337 @@ func (s *LocalFileStorageService) HealthCheck(ctx context.Context) error { } return nil } + +/* +New layout + Access data by hash -> by-data-hash/1st octet/2nd octet/hash + Store data with hash and expiry + Iterate Unordered + Iterate by Time + Prune before + + +Old layout + Access data by hash -> hash + Iterate unordered +*/ + +func listDir(dir string) ([]string, error) { + d, err := os.Open(dir) + if err != nil { + return nil, err + } + defer d.Close() + + // Read all the directory entries + files, err := d.Readdir(-1) + if err != nil { + return nil, err + } + + var fileNames []string + for _, file := range files { + fileNames = append(fileNames, file.Name()) + } + return fileNames, nil +} + +var hex64Regex = regexp.MustCompile(fmt.Sprintf("^[a-fA-F0-9]{%d}$", common.HashLength*2)) + +func isStorageServiceKey(key string) bool { + return hex64Regex.MatchString(key) +} + +// Copies a file by its contents to a new file, making any directories needed +// in the new file's path. +func copyFile(new, orig string) error { + err := os.MkdirAll(path.Dir(new), 0o700) + if err != nil { + return fmt.Errorf("failed to create directory %s: %w", path.Dir(new), err) + } + + origFile, err := os.Open(orig) + if err != nil { + return fmt.Errorf("failed to open source file: %w", err) + } + defer origFile.Close() + + newFile, err := os.Create(new) + if err != nil { + return fmt.Errorf("failed to create destination file: %w", err) + } + defer newFile.Close() + + _, err = io.Copy(newFile, origFile) + if err != nil { + return fmt.Errorf("failed to copy contents: %w", err) + } + + return nil +} + +// Creates an empty file, making any directories needed in the new file's path. +func createEmptyFile(new string) error { + err := os.MkdirAll(path.Dir(new), 0o700) + if err != nil { + return fmt.Errorf("failed to create directory %s: %w", path.Dir(new), err) + } + + file, err := os.OpenFile(new, os.O_CREATE|os.O_WRONLY, 0o600) + if err != nil { + return fmt.Errorf("failed to create file %s: %w", new, err) + } + file.Close() + return nil +} + +func migrate(fl flatLayout, tl trieLayout) error { + flIt, err := fl.iterateBatches() + if err != nil { + return err + } + + if !tl.migrating { + return errors.New("LocalFileStorage already migrated to trieLayout") + } + + migrationStart := time.Now() + + err = func() error { + for batch, found, err := flIt.next(); found; batch, found, err = flIt.next() { + if err != nil { + return err + } + + if tl.expiryEnabled && batch.expiry.Before(migrationStart) { + continue // don't migrate expired batches + } + + origPath := fl.batchPath(batch.key) + newPath := tl.batchPath(batch.key) + if err = copyFile(newPath, origPath); err != nil { + return err + } + + if tl.expiryEnabled { + expiryPath := tl.expiryPath(batch.key, uint64(batch.expiry.Unix())) + createEmptyFile(expiryPath) + } + } + + return tl.commitMigration() + }() + if err != nil { + return fmt.Errorf("error migrating local file store layout, retaining old layout: %w", err) + } + + func() { + for batch, found, err := flIt.next(); found; batch, found, err = flIt.next() { + if err != nil { + log.Warn("local file store migration completed, but error cleaning up old layout, files from that layout are now orphaned", "error", err) + return + } + toRemove := fl.batchPath(batch.key) + err = os.Remove(toRemove) + if err != nil { + log.Warn("local file store migration completed, but error cleaning up file from old layout, file is now orphaned", "file", toRemove, "error", err) + } + } + }() + + return nil +} + +type batchIterator interface { + next() (batchIdentifier, bool, error) +} + +type batchIdentifier struct { + key common.Hash + expiry time.Time +} + +const ( + defaultRetention = time.Hour * 24 * 7 * 3 +) + +type flatLayout struct { + root string + + retention time.Duration +} + +type flatLayoutIterator struct { + files []string + + layout *flatLayout +} + +func (l *flatLayout) batchPath(key common.Hash) string { + return filepath.Join(l.root, EncodeStorageServiceKey(key)) +} + +func (l *flatLayout) iterateBatches() (*flatLayoutIterator, error) { + files, err := listDir(l.root) + if err != nil { + return nil, err + } + return &flatLayoutIterator{ + files: files, + layout: l, + }, nil +} + +func (i *flatLayoutIterator) next() (batchIdentifier, bool, error) { + for len(i.files) > 0 { + var f string + f, i.files = i.files[0], i.files[1:] + path := filepath.Join(i.layout.root, f) + if !isStorageServiceKey(f) { + log.Warn("Incorrectly named batch file found, ignoring", "file", path) + continue + } + key, err := DecodeStorageServiceKey(f) + + stat, err := os.Stat(f) + if err != nil { + return batchIdentifier{}, false, err + } + + return batchIdentifier{ + key: key, + expiry: stat.ModTime().Add(i.layout.retention), + }, true, nil + } + return batchIdentifier{}, false, nil +} + +const ( + byDataHash = "by-data-hash" + byExpiryTimestamp = "by-expiry-timestamp" + migratingSuffix = "-migrating" + expiryDivisor = 10_000 +) + +type trieLayout struct { + root string + expiryEnabled bool + + migrating bool // Is the trieLayout currently being migrated to +} + +type trieLayoutIterator struct { + firstLevel []string + secondLevel []string + files []string + + layout *trieLayout +} + +func (l *trieLayout) batchPath(key common.Hash) string { + encodedKey := EncodeStorageServiceKey(key) + firstDir := encodedKey[:2] + secondDir := encodedKey[2:4] + + topDir := byDataHash + if l.migrating { + topDir = topDir + migratingSuffix + } + + return filepath.Join(l.root, topDir, firstDir, secondDir, encodedKey) +} + +func (l *trieLayout) expiryPath(key common.Hash, expiry uint64) pathParts { + encodedKey := EncodeStorageServiceKey(key) + firstDir := fmt.Sprintf("%d", expiry/expiryDivisor) + secondDir := fmt.Sprintf("%d", expiry%expiryDivisor) + + topDir := byExpiryTimestamp + if l.migrating { + topDir = topDir + migratingSuffix + } + + return filepath.Join(l.root, topDir, firstDir, secondDir, encodedKey) +} + +func (l *trieLayout) iterateBatches() (*trieLayoutIterator, error) { + var firstLevel, secondLevel, files []string + var err error + + firstLevel, err = listDir(filepath.Join(l.root, byDataHash)) + if err != nil { + return nil, err + } + + if len(firstLevel) > 0 { + secondLevel, err = listDir(filepath.Join(l.root, byDataHash, firstLevel[0])) + if err != nil { + return nil, err + } + } + + if len(secondLevel) > 0 { + files, err = listDir(filepath.Join(l.root, byDataHash, firstLevel[0], secondLevel[0])) + if err != nil { + return nil, err + } + } + + return &trieLayoutIterator{ + firstLevel: firstLevel, + secondLevel: secondLevel, + files: files, + layout: l, + }, nil +} + +func (i *trieLayout) commitMigration() error { + if !i.migrating { + return errors.New("already finished migration") + } + + oldDir := filepath.Join(i.root, byDataHash+migratingSuffix) + newDir := filepath.Join(i.root, byDataHash) + + if err := os.Rename(oldDir, newDir); err != nil { + return fmt.Errorf("couldn't rename \"%s\" to \"%s\": %w", oldDir, newDir, err) + } + + syscall.Sync() + return nil +} + +func (i *trieLayoutIterator) next() (string, bool, error) { + for len(i.firstLevel) > 0 { + for len(i.secondLevel) > 0 { + if len(i.files) > 0 { + var f string + f, i.files = i.files[0], i.files[1:] + return filepath.Join(i.layout.root, byDataHash, i.firstLevel[0], i.secondLevel[0], f), true, nil + } + + if len(i.secondLevel) <= 1 { + return "", false, nil + } + i.secondLevel = i.secondLevel[1:] + + files, err := listDir(filepath.Join(i.layout.root, byDataHash, i.firstLevel[0], i.secondLevel[0])) + if err != nil { + return "", false, err + } + i.files = files + } + + if len(i.firstLevel) <= 1 { + return "", false, nil + } + i.firstLevel = i.firstLevel[1:] + secondLevel, err := listDir(filepath.Join(i.layout.root, byDataHash, i.firstLevel[0])) + if err != nil { + return "", false, err + } + i.secondLevel = secondLevel + } + + return "", false, nil +} From e3a9f9ebd68d5082e7c53fbaa7327b045162cc94 Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Fri, 7 Jun 2024 17:00:09 -0700 Subject: [PATCH 02/72] Expiry implementation and tests --- das/local_file_storage_service.go | 378 +++++++++++++++++++++++------- 1 file changed, 297 insertions(+), 81 deletions(-) diff --git a/das/local_file_storage_service.go b/das/local_file_storage_service.go index 61545b9d8d..7a39a9ec37 100644 --- a/das/local_file_storage_service.go +++ b/das/local_file_storage_service.go @@ -13,6 +13,9 @@ import ( "path" "path/filepath" "regexp" + "strconv" + "strings" + "syscall" "time" "github.com/ethereum/go-ethereum/common" @@ -43,9 +46,12 @@ type LocalFileStorageService struct { legacyLayout flatLayout layout trieLayout + + // for testing only + enableLegacyLayout bool } -func NewLocalFileStorageService(dataDir string) (StorageService, error) { +func NewLocalFileStorageService(dataDir string) (*LocalFileStorageService, error) { if unix.Access(dataDir, unix.W_OK|unix.R_OK) != nil { return nil, fmt.Errorf("couldn't start LocalFileStorageService, directory '%s' must be readable and writeable", dataDir) } @@ -69,16 +75,29 @@ func (s *LocalFileStorageService) GetByHash(ctx context.Context, key common.Hash return data, nil } -func (s *LocalFileStorageService) Put(ctx context.Context, data []byte, timeout uint64) error { - logPut("das.LocalFileStorageService.Store", data, timeout, s) - fileName := EncodeStorageServiceKey(dastree.Hash(data)) - finalPath := s.dataDir + "/" + fileName +func (s *LocalFileStorageService) Put(ctx context.Context, data []byte, expiry uint64) error { + // TODO input validation on expiry + logPut("das.LocalFileStorageService.Store", data, expiry, s) + key := dastree.Hash(data) + var batchPath string + if !s.enableLegacyLayout { + batchPath = s.layout.batchPath(key) + + if s.layout.expiryEnabled { + if err := createEmptyFile(s.layout.expiryPath(key, expiry)); err != nil { + return fmt.Errorf("Couldn't create by-expiry-path index entry: %w", err) + } + } + } else { + batchPath = s.legacyLayout.batchPath(key) + } // Use a temp file and rename to achieve atomic writes. - f, err := os.CreateTemp(s.dataDir, fileName) + f, err := os.CreateTemp(path.Dir(batchPath), path.Base(batchPath)) if err != nil { return err } + defer f.Close() err = f.Chmod(0o600) if err != nil { return err @@ -87,13 +106,19 @@ func (s *LocalFileStorageService) Put(ctx context.Context, data []byte, timeout if err != nil { return err } - err = f.Close() - if err != nil { - return err - } - return os.Rename(f.Name(), finalPath) + if s.enableLegacyLayout { + tv := syscall.Timeval{ + Sec: int64(expiry - uint64(s.legacyLayout.retention.Seconds())), + Usec: 0, + } + times := []syscall.Timeval{tv, tv} + if err = syscall.Utimes(f.Name(), times); err != nil { + return err + } + } + return os.Rename(f.Name(), batchPath) } func (s *LocalFileStorageService) Sync(ctx context.Context) error { @@ -150,16 +175,12 @@ func listDir(dir string) ([]string, error) { defer d.Close() // Read all the directory entries - files, err := d.Readdir(-1) + files, err := d.Readdirnames(-1) if err != nil { return nil, err } - var fileNames []string - for _, file := range files { - fileNames = append(fileNames, file.Name()) - } - return fileNames, nil + return files, nil } var hex64Regex = regexp.MustCompile(fmt.Sprintf("^[a-fA-F0-9]{%d}$", common.HashLength*2)) @@ -211,25 +232,26 @@ func createEmptyFile(new string) error { return nil } -func migrate(fl flatLayout, tl trieLayout) error { +func migrate(fl *flatLayout, tl *trieLayout) error { flIt, err := fl.iterateBatches() if err != nil { return err } - if !tl.migrating { - return errors.New("LocalFileStorage already migrated to trieLayout") + if err = tl.startMigration(); err != nil { + return err } migrationStart := time.Now() - + var migrated, skipped, removed int err = func() error { - for batch, found, err := flIt.next(); found; batch, found, err = flIt.next() { + for batch, err := flIt.next(); err != io.EOF; batch, err = flIt.next() { if err != nil { return err } if tl.expiryEnabled && batch.expiry.Before(migrationStart) { + skipped++ continue // don't migrate expired batches } @@ -241,8 +263,11 @@ func migrate(fl flatLayout, tl trieLayout) error { if tl.expiryEnabled { expiryPath := tl.expiryPath(batch.key, uint64(batch.expiry.Unix())) - createEmptyFile(expiryPath) + if err = createEmptyFile(expiryPath); err != nil { + return err + } } + migrated++ } return tl.commitMigration() @@ -251,20 +276,80 @@ func migrate(fl flatLayout, tl trieLayout) error { return fmt.Errorf("error migrating local file store layout, retaining old layout: %w", err) } - func() { - for batch, found, err := flIt.next(); found; batch, found, err = flIt.next() { - if err != nil { - log.Warn("local file store migration completed, but error cleaning up old layout, files from that layout are now orphaned", "error", err) - return - } - toRemove := fl.batchPath(batch.key) - err = os.Remove(toRemove) - if err != nil { - log.Warn("local file store migration completed, but error cleaning up file from old layout, file is now orphaned", "file", toRemove, "error", err) + flIt, err = fl.iterateBatches() + if err != nil { + return err + } + for batch, err := flIt.next(); err != io.EOF; batch, err = flIt.next() { + if err != nil { + log.Warn("local file store migration completed, but error cleaning up old layout, files from that layout are now orphaned", "error", err) + break + } + toRemove := fl.batchPath(batch.key) + err = os.Remove(toRemove) + if err != nil { + log.Warn("local file store migration completed, but error cleaning up file from old layout, file is now orphaned", "file", toRemove, "error", err) + } + removed++ + } + + log.Info("Local file store legacy layout migration complete", "migratedFiles", migrated, "skippedExpiredFiles", skipped, "removedFiles", removed) + + return nil +} + +func prune(tl trieLayout, pruneTil time.Time) error { + it, err := tl.iterateBatchesByTimestamp(pruneTil) + if err != nil { + return err + } + pruned := 0 + for file, err := it.next(); err != io.EOF; file, err = it.next() { + if err != nil { + return err + } + pathByTimestamp := path.Base(file) + key, err := DecodeStorageServiceKey(path.Base(pathByTimestamp)) + if err != nil { + return err + } + pathByHash := tl.batchPath(key) + err = recursivelyDeleteUntil(pathByHash, byDataHash) + if err != nil { + if os.IsNotExist(err) { + log.Warn("Couldn't find batch to expire, it may have been previously deleted but its by-expiry-timestamp index entry still exists, trying to clean up the index next", "path", pathByHash, "indexPath", pathByTimestamp, "err", err) + + } else { + log.Error("Couldn't prune expired batch, continuing trying to prune others", "path", pathByHash, "err", err) + continue } + } - }() + err = recursivelyDeleteUntil(pathByTimestamp, byExpiryTimestamp) + if err != nil { + log.Error("Couldn't prune expired batch expiry index entry, continuing trying to prune others", "path", pathByHash, "err", err) + } + pruned++ + } + log.Info("local file store pruned expired batches", "count", pruned) + return nil +} + +func recursivelyDeleteUntil(filePath, until string) error { + err := os.Remove(filePath) + if err != nil { + return err + } + for filePath = path.Dir(filePath); path.Base(filePath) != until; filePath = path.Dir(filePath) { + err = os.Remove(filePath) + if err != nil { + if !strings.Contains(err.Error(), "directory not empty") { + log.Warn("error cleaning up empty directory when pruning expired batches", "path", filePath, "err", err) + } + break + } + } return nil } @@ -297,6 +382,10 @@ func (l *flatLayout) batchPath(key common.Hash) string { return filepath.Join(l.root, EncodeStorageServiceKey(key)) } +type layerFilter func(*[][]string, int) bool + +func noopFilter(*[][]string, int) bool { return true } + func (l *flatLayout) iterateBatches() (*flatLayoutIterator, error) { files, err := listDir(l.root) if err != nil { @@ -308,28 +397,30 @@ func (l *flatLayout) iterateBatches() (*flatLayoutIterator, error) { }, nil } -func (i *flatLayoutIterator) next() (batchIdentifier, bool, error) { +func (i *flatLayoutIterator) next() (batchIdentifier, error) { for len(i.files) > 0 { var f string f, i.files = i.files[0], i.files[1:] - path := filepath.Join(i.layout.root, f) if !isStorageServiceKey(f) { - log.Warn("Incorrectly named batch file found, ignoring", "file", path) continue } key, err := DecodeStorageServiceKey(f) + if err != nil { + return batchIdentifier{}, err + } - stat, err := os.Stat(f) + fullPath := i.layout.batchPath(key) + stat, err := os.Stat(fullPath) if err != nil { - return batchIdentifier{}, false, err + return batchIdentifier{}, err } return batchIdentifier{ key: key, expiry: stat.ModTime().Add(i.layout.retention), - }, true, nil + }, nil } - return batchIdentifier{}, false, nil + return batchIdentifier{}, io.EOF } const ( @@ -347,11 +438,10 @@ type trieLayout struct { } type trieLayoutIterator struct { - firstLevel []string - secondLevel []string - files []string - - layout *trieLayout + levels [][]string + filters []layerFilter + topDir string + layout *trieLayout } func (l *trieLayout) batchPath(key common.Hash) string { @@ -367,7 +457,7 @@ func (l *trieLayout) batchPath(key common.Hash) string { return filepath.Join(l.root, topDir, firstDir, secondDir, encodedKey) } -func (l *trieLayout) expiryPath(key common.Hash, expiry uint64) pathParts { +func (l *trieLayout) expiryPath(key common.Hash, expiry uint64) string { encodedKey := EncodeStorageServiceKey(key) firstDir := fmt.Sprintf("%d", expiry/expiryDivisor) secondDir := fmt.Sprintf("%d", expiry%expiryDivisor) @@ -384,6 +474,8 @@ func (l *trieLayout) iterateBatches() (*trieLayoutIterator, error) { var firstLevel, secondLevel, files []string var err error + // TODO handle stray files that aren't dirs + firstLevel, err = listDir(filepath.Join(l.root, byDataHash)) if err != nil { return nil, err @@ -403,61 +495,185 @@ func (l *trieLayout) iterateBatches() (*trieLayoutIterator, error) { } } + storageKeyFilter := func(layers *[][]string, idx int) bool { + return isStorageServiceKey((*layers)[idx][0]) + } + return &trieLayoutIterator{ - firstLevel: firstLevel, - secondLevel: secondLevel, - files: files, - layout: l, + levels: [][]string{firstLevel, secondLevel, files}, + filters: []layerFilter{noopFilter, noopFilter, storageKeyFilter}, + topDir: byDataHash, + layout: l, }, nil } -func (i *trieLayout) commitMigration() error { - if !i.migrating { +func (l *trieLayout) iterateBatchesByTimestamp(maxTimestamp time.Time) (*trieLayoutIterator, error) { + var firstLevel, secondLevel, files []string + var err error + + firstLevel, err = listDir(filepath.Join(l.root, byExpiryTimestamp)) + if err != nil { + return nil, err + } + + if len(firstLevel) > 0 { + secondLevel, err = listDir(filepath.Join(l.root, byExpiryTimestamp, firstLevel[0])) + if err != nil { + return nil, err + } + } + + if len(secondLevel) > 0 { + files, err = listDir(filepath.Join(l.root, byExpiryTimestamp, firstLevel[0], secondLevel[0])) + if err != nil { + return nil, err + } + } + + beforeUpper := func(layers *[][]string, idx int) bool { + num, err := strconv.Atoi((*layers)[idx][0]) + if err != nil { + return false + } + return int64(num) <= maxTimestamp.Unix()/expiryDivisor + } + beforeLower := func(layers *[][]string, idx int) bool { + num, err := strconv.Atoi((*layers)[idx-1][0] + (*layers)[idx][0]) + if err != nil { + return false + } + return int64(num) <= maxTimestamp.Unix() + } + storageKeyFilter := func(layers *[][]string, idx int) bool { + return isStorageServiceKey((*layers)[idx][0]) + } + + return &trieLayoutIterator{ + levels: [][]string{firstLevel, secondLevel, files}, + filters: []layerFilter{beforeUpper, beforeLower, storageKeyFilter}, + topDir: byExpiryTimestamp, + layout: l, + }, nil +} + +func (l *trieLayout) startMigration() error { + // TODO check for existing dirs + if !l.migrating { + return errors.New("Local file storage already migrated to trieLayout") + } + + if err := os.MkdirAll(filepath.Join(l.root, byDataHash+migratingSuffix), 0o700); err != nil { + return err + } + + if l.expiryEnabled { + if err := os.MkdirAll(filepath.Join(l.root, byExpiryTimestamp+migratingSuffix), 0o700); err != nil { + return err + } + } + return nil + +} + +func (l *trieLayout) commitMigration() error { + if !l.migrating { return errors.New("already finished migration") } - oldDir := filepath.Join(i.root, byDataHash+migratingSuffix) - newDir := filepath.Join(i.root, byDataHash) + removeSuffix := func(prefix string) error { + oldDir := filepath.Join(l.root, prefix+migratingSuffix) + newDir := filepath.Join(l.root, prefix) - if err := os.Rename(oldDir, newDir); err != nil { - return fmt.Errorf("couldn't rename \"%s\" to \"%s\": %w", oldDir, newDir, err) + if err := os.Rename(oldDir, newDir); err != nil { + return err // rename error already includes src and dst, no need to wrap + } + return nil + } + + if err := removeSuffix(byDataHash); err != nil { + return err + } + + if l.expiryEnabled { + if err := removeSuffix(byExpiryTimestamp); err != nil { + return err + } } syscall.Sync() return nil } -func (i *trieLayoutIterator) next() (string, bool, error) { - for len(i.firstLevel) > 0 { - for len(i.secondLevel) > 0 { - if len(i.files) > 0 { - var f string - f, i.files = i.files[0], i.files[1:] - return filepath.Join(i.layout.root, byDataHash, i.firstLevel[0], i.secondLevel[0], f), true, nil - } +func (it *trieLayoutIterator) next() (string, error) { + isLeaf := func(idx int) bool { + return idx == len(it.levels)-1 + } + + makePathAtLevel := func(idx int) string { + pathComponents := make([]string, idx+3) + pathComponents[0] = it.layout.root + pathComponents[1] = it.topDir + for i := 0; i <= idx; i++ { + pathComponents[i+2] = it.levels[i][0] + } + return filepath.Join(pathComponents...) + } - if len(i.secondLevel) <= 1 { - return "", false, nil + var populateNextLevel func(idx int) error + populateNextLevel = func(idx int) error { + if isLeaf(idx) || len(it.levels[idx]) == 0 { + return nil + } + nextLevelEntries, err := listDir(makePathAtLevel(idx)) + if err != nil { + return err + } + it.levels[idx+1] = nextLevelEntries + if len(nextLevelEntries) > 0 { + return populateNextLevel(idx + 1) + } + return nil + } + + advanceWithinLevel := func(idx int) error { + if len(it.levels[idx]) > 1 { + it.levels[idx] = it.levels[idx][1:] + } else { + it.levels[idx] = nil + } + + return populateNextLevel(idx) + } + + for idx := 0; idx >= 0; { + if len(it.levels[idx]) == 0 { + idx-- + continue + } + + if !it.filters[idx](&it.levels, idx) { + if err := advanceWithinLevel(idx); err != nil { + return "", err } - i.secondLevel = i.secondLevel[1:] + continue + } - files, err := listDir(filepath.Join(i.layout.root, byDataHash, i.firstLevel[0], i.secondLevel[0])) - if err != nil { - return "", false, err + if isLeaf(idx) { + path := makePathAtLevel(idx) + if err := advanceWithinLevel(idx); err != nil { + return "", err } - i.files = files + return path, nil } - if len(i.firstLevel) <= 1 { - return "", false, nil + if len(it.levels[idx+1]) > 0 { + idx++ + continue } - i.firstLevel = i.firstLevel[1:] - secondLevel, err := listDir(filepath.Join(i.layout.root, byDataHash, i.firstLevel[0])) - if err != nil { - return "", false, err + + if err := advanceWithinLevel(idx); err != nil { + return "", err } - i.secondLevel = secondLevel } - - return "", false, nil + return "", io.EOF } From 45ab8a279a9baa3b1a834e8317878c8cd75d3a2f Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Mon, 10 Jun 2024 18:12:49 -0700 Subject: [PATCH 03/72] Logic for starting expiry, tests --- das/das_test.go | 10 +- das/factory.go | 6 +- das/local_file_storage_service.go | 162 +++++++++++++++++-------- das/local_file_storage_service_test.go | 120 ++++++++++++++++++ 4 files changed, 243 insertions(+), 55 deletions(-) create mode 100644 das/local_file_storage_service_test.go diff --git a/das/das_test.go b/das/das_test.go index 950b63d9d9..0858a217da 100644 --- a/das/das_test.go +++ b/das/das_test.go @@ -40,8 +40,9 @@ func testDASStoreRetrieveMultipleInstances(t *testing.T, storageType string) { KeyDir: dbPath, }, LocalFileStorage: LocalFileStorageConfig{ - Enable: enableFileStorage, - DataDir: dbPath, + Enable: enableFileStorage, + DataDir: dbPath, + MaxRetention: DefaultLocalFileStorageConfig.MaxRetention, }, LocalDBStorage: dbConfig, ParentChainNodeURL: "none", @@ -129,8 +130,9 @@ func testDASMissingMessage(t *testing.T, storageType string) { KeyDir: dbPath, }, LocalFileStorage: LocalFileStorageConfig{ - Enable: enableFileStorage, - DataDir: dbPath, + Enable: enableFileStorage, + DataDir: dbPath, + MaxRetention: DefaultLocalFileStorageConfig.MaxRetention, }, LocalDBStorage: dbConfig, ParentChainNodeURL: "none", diff --git a/das/factory.go b/das/factory.go index d9eacd0ada..3ab7cc2401 100644 --- a/das/factory.go +++ b/das/factory.go @@ -35,7 +35,11 @@ func CreatePersistentStorageService( } if config.LocalFileStorage.Enable { - s, err := NewLocalFileStorageService(config.LocalFileStorage.DataDir) + s, err := NewLocalFileStorageService(config.LocalFileStorage) + if err != nil { + return nil, nil, err + } + s.start(ctx) if err != nil { return nil, nil, err } diff --git a/das/local_file_storage_service.go b/das/local_file_storage_service.go index 7a39a9ec37..470cac88b6 100644 --- a/das/local_file_storage_service.go +++ b/das/local_file_storage_service.go @@ -23,48 +23,99 @@ import ( "github.com/offchainlabs/nitro/arbstate/daprovider" "github.com/offchainlabs/nitro/das/dastree" "github.com/offchainlabs/nitro/util/pretty" + "github.com/offchainlabs/nitro/util/stopwaiter" flag "github.com/spf13/pflag" "golang.org/x/sys/unix" ) type LocalFileStorageConfig struct { - Enable bool `koanf:"enable"` - DataDir string `koanf:"data-dir"` + Enable bool `koanf:"enable"` + DataDir string `koanf:"data-dir"` + EnableExpiry bool `koanf:"enable-expiry"` + MaxRetention time.Duration `koanf:"max-retention"` } var DefaultLocalFileStorageConfig = LocalFileStorageConfig{ - DataDir: "", + DataDir: "", + MaxRetention: time.Hour * 24 * 21, // 6 days longer than the batch poster default } func LocalFileStorageConfigAddOptions(prefix string, f *flag.FlagSet) { f.Bool(prefix+".enable", DefaultLocalFileStorageConfig.Enable, "enable storage/retrieval of sequencer batch data from a directory of files, one per batch") f.String(prefix+".data-dir", DefaultLocalFileStorageConfig.DataDir, "local data directory") + f.Bool(prefix+".enable-expiry", DefaultLocalFileStorageConfig.EnableExpiry, "enable expiry of batches") + f.Duration(prefix+".max-retention", DefaultLocalFileStorageConfig.MaxRetention, "store requests with expiry times farther in the future than max-retention will be rejected") } type LocalFileStorageService struct { - dataDir string + config LocalFileStorageConfig legacyLayout flatLayout layout trieLayout // for testing only enableLegacyLayout bool + + stopWaiter stopwaiter.StopWaiterSafe } -func NewLocalFileStorageService(dataDir string) (*LocalFileStorageService, error) { - if unix.Access(dataDir, unix.W_OK|unix.R_OK) != nil { - return nil, fmt.Errorf("couldn't start LocalFileStorageService, directory '%s' must be readable and writeable", dataDir) +func NewLocalFileStorageService(config LocalFileStorageConfig) (*LocalFileStorageService, error) { + if unix.Access(config.DataDir, unix.W_OK|unix.R_OK) != nil { + return nil, fmt.Errorf("couldn't start LocalFileStorageService, directory '%s' must be readable and writeable", config.DataDir) } - return &LocalFileStorageService{ - dataDir: dataDir, - legacyLayout: flatLayout{root: dataDir}, - layout: trieLayout{root: dataDir}, - }, nil + s := &LocalFileStorageService{ + config: config, + legacyLayout: flatLayout{root: config.DataDir}, + layout: trieLayout{root: config.DataDir, expiryEnabled: config.EnableExpiry}, + } + return s, nil +} + +// Separate start function +// Tests want to be able to avoid triggering the auto migration +func (s *LocalFileStorageService) start(ctx context.Context) error { + migrated, err := s.layout.migrated() + if err != nil { + return err + } + + if !migrated && !s.enableLegacyLayout { + if err = migrate(&s.legacyLayout, &s.layout); err != nil { + return err + } + } + + if err := s.stopWaiter.Start(ctx, s); err != nil { + return err + } + if s.config.EnableExpiry && !s.enableLegacyLayout { + err = s.stopWaiter.CallIterativelySafe(func(ctx context.Context) time.Duration { + err = s.layout.prune(time.Now()) + if err != nil { + log.Error("error pruning expired batches", "error", err) + } + return time.Minute * 5 + }) + if err != nil { + return err + } + } + return nil +} + +func (s *LocalFileStorageService) Close(ctx context.Context) error { + return s.stopWaiter.StopAndWait() } func (s *LocalFileStorageService) GetByHash(ctx context.Context, key common.Hash) ([]byte, error) { log.Trace("das.LocalFileStorageService.GetByHash", "key", pretty.PrettyHash(key), "this", s) - batchPath := s.legacyLayout.batchPath(key) + var batchPath string + if s.enableLegacyLayout { + batchPath = s.legacyLayout.batchPath(key) + } else { + batchPath = s.layout.batchPath(key) + } + data, err := os.ReadFile(batchPath) if err != nil { if errors.Is(err, os.ErrNotExist) { @@ -76,8 +127,11 @@ func (s *LocalFileStorageService) GetByHash(ctx context.Context, key common.Hash } func (s *LocalFileStorageService) Put(ctx context.Context, data []byte, expiry uint64) error { - // TODO input validation on expiry logPut("das.LocalFileStorageService.Store", data, expiry, s) + if time.Unix(int64(expiry), 0).After(time.Now().Add(s.config.MaxRetention)) { + return errors.New("requested expiry time exceeds maximum allowed retention period") + } + key := dastree.Hash(data) var batchPath string if !s.enableLegacyLayout { @@ -92,6 +146,11 @@ func (s *LocalFileStorageService) Put(ctx context.Context, data []byte, expiry u batchPath = s.legacyLayout.batchPath(key) } + err := os.MkdirAll(path.Dir(batchPath), 0o700) + if err != nil { + return fmt.Errorf("failed to create directory %s: %w", path.Dir(batchPath), err) + } + // Use a temp file and rename to achieve atomic writes. f, err := os.CreateTemp(path.Dir(batchPath), path.Base(batchPath)) if err != nil { @@ -125,16 +184,12 @@ func (s *LocalFileStorageService) Sync(ctx context.Context) error { return nil } -func (s *LocalFileStorageService) Close(ctx context.Context) error { - return nil -} - func (s *LocalFileStorageService) ExpirationPolicy(ctx context.Context) (daprovider.ExpirationPolicy, error) { return daprovider.KeepForever, nil } func (s *LocalFileStorageService) String() string { - return "LocalFileStorageService(" + s.dataDir + ")" + return "LocalFileStorageService(" + s.config.DataDir + ")" } func (s *LocalFileStorageService) HealthCheck(ctx context.Context) error { @@ -153,20 +208,6 @@ func (s *LocalFileStorageService) HealthCheck(ctx context.Context) error { return nil } -/* -New layout - Access data by hash -> by-data-hash/1st octet/2nd octet/hash - Store data with hash and expiry - Iterate Unordered - Iterate by Time - Prune before - - -Old layout - Access data by hash -> hash - Iterate unordered -*/ - func listDir(dir string) ([]string, error) { d, err := os.Open(dir) if err != nil { @@ -238,14 +279,23 @@ func migrate(fl *flatLayout, tl *trieLayout) error { return err } - if err = tl.startMigration(); err != nil { + batch, err := flIt.next() + if errors.Is(err, io.EOF) { + log.Info("No batches in legacy layout detected, skipping migration.") + return nil + } + if err != nil { return err } + if startErr := tl.startMigration(); startErr != nil { + return startErr + } + migrationStart := time.Now() var migrated, skipped, removed int err = func() error { - for batch, err := flIt.next(); err != io.EOF; batch, err = flIt.next() { + for ; !errors.Is(err, io.EOF); batch, err = flIt.next() { if err != nil { return err } @@ -280,7 +330,7 @@ func migrate(fl *flatLayout, tl *trieLayout) error { if err != nil { return err } - for batch, err := flIt.next(); err != io.EOF; batch, err = flIt.next() { + for batch, err := flIt.next(); !errors.Is(err, io.EOF); batch, err = flIt.next() { if err != nil { log.Warn("local file store migration completed, but error cleaning up old layout, files from that layout are now orphaned", "error", err) break @@ -298,13 +348,13 @@ func migrate(fl *flatLayout, tl *trieLayout) error { return nil } -func prune(tl trieLayout, pruneTil time.Time) error { +func (tl *trieLayout) prune(pruneTil time.Time) error { it, err := tl.iterateBatchesByTimestamp(pruneTil) if err != nil { return err } pruned := 0 - for file, err := it.next(); err != io.EOF; file, err = it.next() { + for file, err := it.next(); !errors.Is(err, io.EOF); file, err = it.next() { if err != nil { return err } @@ -331,7 +381,9 @@ func prune(tl trieLayout, pruneTil time.Time) error { } pruned++ } - log.Info("local file store pruned expired batches", "count", pruned) + if pruned > 0 { + log.Info("local file store pruned expired batches", "count", pruned) + } return nil } @@ -353,19 +405,11 @@ func recursivelyDeleteUntil(filePath, until string) error { return nil } -type batchIterator interface { - next() (batchIdentifier, bool, error) -} - type batchIdentifier struct { key common.Hash expiry time.Time } -const ( - defaultRetention = time.Hour * 24 * 7 * 3 -) - type flatLayout struct { root string @@ -434,7 +478,9 @@ type trieLayout struct { root string expiryEnabled bool - migrating bool // Is the trieLayout currently being migrated to + // Is the trieLayout currently being migrated to? + // Controls whether paths include the migratingSuffix. + migrating bool } type trieLayoutIterator struct { @@ -556,12 +602,28 @@ func (l *trieLayout) iterateBatchesByTimestamp(maxTimestamp time.Time) (*trieLay }, nil } +func (l *trieLayout) migrated() (bool, error) { + info, err := os.Stat(filepath.Join(l.root, byDataHash)) + if os.IsNotExist(err) { + return false, nil + } + if err != nil { + return false, err + } + return info.IsDir(), nil +} + func (l *trieLayout) startMigration() error { - // TODO check for existing dirs - if !l.migrating { + migrated, err := l.migrated() + if err != nil { + return err + } + if migrated { return errors.New("Local file storage already migrated to trieLayout") } + l.migrating = true + if err := os.MkdirAll(filepath.Join(l.root, byDataHash+migratingSuffix), 0o700); err != nil { return err } diff --git a/das/local_file_storage_service_test.go b/das/local_file_storage_service_test.go new file mode 100644 index 0000000000..7eb6aea8f7 --- /dev/null +++ b/das/local_file_storage_service_test.go @@ -0,0 +1,120 @@ +// Copyright 2024, Offchain Labs, Inc. +// For license information, see https://github.com/nitro/blob/master/LICENSE + +package das + +import ( + "context" + "errors" + "io" + "testing" + "time" +) + +func TestMigrationNoExpiry(t *testing.T) { + dir := t.TempDir() + t.Logf("temp dir: %s", dir) + ctx := context.Background() + + config := LocalFileStorageConfig{ + Enable: true, + DataDir: dir, + EnableExpiry: false, + MaxRetention: time.Hour * 24 * 30, + } + s, err := NewLocalFileStorageService(config) + Require(t, err) + s.enableLegacyLayout = true + + now := uint64(time.Now().Unix()) + + err = s.Put(ctx, []byte("a"), now+1) + Require(t, err) + err = s.Put(ctx, []byte("b"), now+1) + Require(t, err) + err = s.Put(ctx, []byte("c"), now+2) + Require(t, err) + err = s.Put(ctx, []byte("d"), now+10) + Require(t, err) + + err = migrate(&s.legacyLayout, &s.layout) + Require(t, err) + + migrated := 0 + trIt, err := s.layout.iterateBatches() + Require(t, err) + for _, err := trIt.next(); !errors.Is(err, io.EOF); _, err = trIt.next() { + Require(t, err) + migrated++ + } + if migrated != 4 { + t.Fail() + } + + // byTimestampEntries := 0 + trIt, err = s.layout.iterateBatchesByTimestamp(time.Unix(int64(now+10), 0)) + if err == nil { + t.Fail() + } +} + +func TestMigrationExpiry(t *testing.T) { + dir := t.TempDir() + ctx := context.Background() + + config := LocalFileStorageConfig{ + Enable: true, + DataDir: dir, + EnableExpiry: true, + MaxRetention: time.Hour * 24 * 30, + } + s, err := NewLocalFileStorageService(config) + Require(t, err) + s.enableLegacyLayout = true + + now := uint64(time.Now().Unix()) + + err = s.Put(ctx, []byte("a"), now-expiryDivisor*2) + Require(t, err) + err = s.Put(ctx, []byte("b"), now-expiryDivisor) + Require(t, err) + err = s.Put(ctx, []byte("c"), now+expiryDivisor) + Require(t, err) + err = s.Put(ctx, []byte("d"), now+expiryDivisor) + Require(t, err) + err = s.Put(ctx, []byte("e"), now+expiryDivisor*2) + Require(t, err) + + s.layout.expiryEnabled = true + err = migrate(&s.legacyLayout, &s.layout) + Require(t, err) + + migrated := 0 + trIt, err := s.layout.iterateBatches() + Require(t, err) + for _, err := trIt.next(); !errors.Is(err, io.EOF); _, err = trIt.next() { + Require(t, err) + migrated++ + } + if migrated != 3 { + t.Fail() + } + + countTimestampEntries := func(cutoff, expected uint64) { + var byTimestampEntries uint64 + trIt, err = s.layout.iterateBatchesByTimestamp(time.Unix(int64(cutoff), 0)) + Require(t, err) + for batch, err := trIt.next(); !errors.Is(err, io.EOF); batch, err = trIt.next() { + Require(t, err) + t.Logf("indexCreated %s", batch) + byTimestampEntries++ + } + if byTimestampEntries != expected { + t.Fail() + } + } + + countTimestampEntries(now, 0) // They should have all been filtered out since they're after now + countTimestampEntries(now+expiryDivisor, 2) + countTimestampEntries(now+expiryDivisor*2, 3) +} From 088c380d1cabf7bec22c96c3866785fffe819d5b Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Tue, 11 Jun 2024 17:42:20 -0700 Subject: [PATCH 04/72] fix l1SyncService retention and expiry Make it so that the l1SyncService can sync expired data and set also set sensible retention. --- das/local_file_storage_service.go | 2 +- das/storage_service.go | 3 +++ das/syncing_fallback_storage.go | 10 ++++++---- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/das/local_file_storage_service.go b/das/local_file_storage_service.go index 470cac88b6..bd907a3645 100644 --- a/das/local_file_storage_service.go +++ b/das/local_file_storage_service.go @@ -37,7 +37,7 @@ type LocalFileStorageConfig struct { var DefaultLocalFileStorageConfig = LocalFileStorageConfig{ DataDir: "", - MaxRetention: time.Hour * 24 * 21, // 6 days longer than the batch poster default + MaxRetention: defaultStorageRetention, } func LocalFileStorageConfigAddOptions(prefix string, f *flag.FlagSet) { diff --git a/das/storage_service.go b/das/storage_service.go index 806e80dba5..b7526077e9 100644 --- a/das/storage_service.go +++ b/das/storage_service.go @@ -8,6 +8,7 @@ import ( "errors" "fmt" "strings" + "time" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/hexutil" @@ -25,6 +26,8 @@ type StorageService interface { HealthCheck(ctx context.Context) error } +const defaultStorageRetention = time.Hour * 24 * 21 // 6 days longer than the batch poster default + func EncodeStorageServiceKey(key common.Hash) string { return key.Hex()[2:] } diff --git a/das/syncing_fallback_storage.go b/das/syncing_fallback_storage.go index 411e7a1977..000c16ff09 100644 --- a/das/syncing_fallback_storage.go +++ b/das/syncing_fallback_storage.go @@ -8,7 +8,6 @@ import ( "encoding/binary" "errors" "fmt" - "math" "math/big" "os" "sync" @@ -65,17 +64,19 @@ type SyncToStorageConfig struct { IgnoreWriteErrors bool `koanf:"ignore-write-errors"` ParentChainBlocksPerRead uint64 `koanf:"parent-chain-blocks-per-read"` StateDir string `koanf:"state-dir"` + SyncExpiredData bool `koanf:"sync-expired-data"` } var DefaultSyncToStorageConfig = SyncToStorageConfig{ CheckAlreadyExists: true, Eager: false, EagerLowerBoundBlock: 0, - RetentionPeriod: time.Duration(math.MaxInt64), + RetentionPeriod: defaultStorageRetention, DelayOnError: time.Second, IgnoreWriteErrors: true, ParentChainBlocksPerRead: 100, StateDir: "", + SyncExpiredData: true, } func SyncToStorageConfigAddOptions(prefix string, f *flag.FlagSet) { @@ -83,10 +84,11 @@ func SyncToStorageConfigAddOptions(prefix string, f *flag.FlagSet) { f.Bool(prefix+".eager", DefaultSyncToStorageConfig.Eager, "eagerly sync batch data to this DAS's storage from the rest endpoints, using L1 as the index of batch data hashes; otherwise only sync lazily") f.Uint64(prefix+".eager-lower-bound-block", DefaultSyncToStorageConfig.EagerLowerBoundBlock, "when eagerly syncing, start indexing forward from this L1 block. Only used if there is no sync state") f.Uint64(prefix+".parent-chain-blocks-per-read", DefaultSyncToStorageConfig.ParentChainBlocksPerRead, "when eagerly syncing, max l1 blocks to read per poll") - f.Duration(prefix+".retention-period", DefaultSyncToStorageConfig.RetentionPeriod, "period to retain synced data (defaults to forever)") + f.Duration(prefix+".retention-period", DefaultSyncToStorageConfig.RetentionPeriod, "period to request storage to retain synced data") f.Duration(prefix+".delay-on-error", DefaultSyncToStorageConfig.DelayOnError, "time to wait if encountered an error before retrying") f.Bool(prefix+".ignore-write-errors", DefaultSyncToStorageConfig.IgnoreWriteErrors, "log only on failures to write when syncing; otherwise treat it as an error") f.String(prefix+".state-dir", DefaultSyncToStorageConfig.StateDir, "directory to store the sync state in, ie the block number currently synced up to, so that we don't sync from scratch each time") + f.Bool(prefix+".sync-expired-data", DefaultSyncToStorageConfig.SyncExpiredData, "sync even data that is expired; needed for mirror configuration") } type l1SyncService struct { @@ -192,7 +194,7 @@ func (s *l1SyncService) processBatchDelivered(ctx context.Context, batchDelivere } log.Info("BatchDelivered", "log", batchDeliveredLog, "event", deliveredEvent) storeUntil := arbmath.SaturatingUAdd(deliveredEvent.TimeBounds.MaxTimestamp, uint64(s.config.RetentionPeriod.Seconds())) - if storeUntil < uint64(time.Now().Unix()) { + if !s.config.SyncExpiredData && storeUntil < uint64(time.Now().Unix()) { // old batch - no need to store return nil } From 7596da61a4a7af76fce22ad52336e2324946da84 Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Wed, 12 Jun 2024 11:00:18 -0700 Subject: [PATCH 05/72] Plumb retention period to legacy layout Also fix unsetting the migrating flag after migration is done. --- das/factory.go | 2 +- das/local_file_storage_service.go | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/das/factory.go b/das/factory.go index 3ab7cc2401..8f6432234d 100644 --- a/das/factory.go +++ b/das/factory.go @@ -39,7 +39,7 @@ func CreatePersistentStorageService( if err != nil { return nil, nil, err } - s.start(ctx) + err = s.start(ctx) if err != nil { return nil, nil, err } diff --git a/das/local_file_storage_service.go b/das/local_file_storage_service.go index bd907a3645..0b731bbdda 100644 --- a/das/local_file_storage_service.go +++ b/das/local_file_storage_service.go @@ -65,7 +65,7 @@ func NewLocalFileStorageService(config LocalFileStorageConfig) (*LocalFileStorag } s := &LocalFileStorageService{ config: config, - legacyLayout: flatLayout{root: config.DataDir}, + legacyLayout: flatLayout{root: config.DataDir, retention: config.MaxRetention}, layout: trieLayout{root: config.DataDir, expiryEnabled: config.EnableExpiry}, } return s, nil @@ -128,8 +128,10 @@ func (s *LocalFileStorageService) GetByHash(ctx context.Context, key common.Hash func (s *LocalFileStorageService) Put(ctx context.Context, data []byte, expiry uint64) error { logPut("das.LocalFileStorageService.Store", data, expiry, s) - if time.Unix(int64(expiry), 0).After(time.Now().Add(s.config.MaxRetention)) { - return errors.New("requested expiry time exceeds maximum allowed retention period") + expiryTime := time.Unix(int64(expiry), 0) + currentTimePlusRetention := time.Now().Add(s.config.MaxRetention) + if expiryTime.After(currentTimePlusRetention) { + return fmt.Errorf("requested expiry time (%v) exceeds current time plus maximum allowed retention period(%v)", expiryTime, currentTimePlusRetention) } key := dastree.Hash(data) @@ -343,7 +345,7 @@ func migrate(fl *flatLayout, tl *trieLayout) error { removed++ } - log.Info("Local file store legacy layout migration complete", "migratedFiles", migrated, "skippedExpiredFiles", skipped, "removedFiles", removed) + log.Info("Local file store legacy layout migration complete", "migratedFiles", migrated, "skippedExpiredFiles", skipped, "removedFiles", removed, "duration", time.Since(migrationStart)) return nil } @@ -354,6 +356,7 @@ func (tl *trieLayout) prune(pruneTil time.Time) error { return err } pruned := 0 + pruningStart := time.Now() for file, err := it.next(); !errors.Is(err, io.EOF); file, err = it.next() { if err != nil { return err @@ -382,7 +385,7 @@ func (tl *trieLayout) prune(pruneTil time.Time) error { pruned++ } if pruned > 0 { - log.Info("local file store pruned expired batches", "count", pruned) + log.Info("local file store pruned expired batches", "count", pruned, "pruneTil", pruneTil, "duration", time.Since(pruningStart)) } return nil } @@ -663,6 +666,10 @@ func (l *trieLayout) commitMigration() error { } syscall.Sync() + + // Done migrating + l.migrating = false + return nil } From 13f79586319e5b86943672084218ecf0215327ac Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Wed, 12 Jun 2024 13:12:23 -0700 Subject: [PATCH 06/72] Switch to ref counted expiry using hard links We can have batches with identical data, so the trieLayout needs to be able to handle being requested to store the same batch multiple times with different expiry times. The way it works now is that the leaf files in by-expiry-timestamp are hard links to the batch files in by-data-hash. The link count is used to know when there are no more links in by-expiry-timestamp pointing to the batch file and that it can be deleted. --- das/local_file_storage_service.go | 98 +++++++++++++++++++++++-------- 1 file changed, 72 insertions(+), 26 deletions(-) diff --git a/das/local_file_storage_service.go b/das/local_file_storage_service.go index 0b731bbdda..10298e6826 100644 --- a/das/local_file_storage_service.go +++ b/das/local_file_storage_service.go @@ -15,6 +15,7 @@ import ( "regexp" "strconv" "strings" + "sync" "syscall" "time" @@ -137,13 +138,9 @@ func (s *LocalFileStorageService) Put(ctx context.Context, data []byte, expiry u key := dastree.Hash(data) var batchPath string if !s.enableLegacyLayout { + s.layout.writeMutex.Lock() + defer s.layout.writeMutex.Unlock() batchPath = s.layout.batchPath(key) - - if s.layout.expiryEnabled { - if err := createEmptyFile(s.layout.expiryPath(key, expiry)); err != nil { - return fmt.Errorf("Couldn't create by-expiry-path index entry: %w", err) - } - } } else { batchPath = s.legacyLayout.batchPath(key) } @@ -158,7 +155,15 @@ func (s *LocalFileStorageService) Put(ctx context.Context, data []byte, expiry u if err != nil { return err } - defer f.Close() + renamed := false + defer func() { + _ = f.Close() + if !renamed { + if err := os.Remove(f.Name()); err != nil { + log.Error("Couldn't clean up temporary file", "file", f.Name()) + } + } + }() err = f.Chmod(0o600) if err != nil { return err @@ -179,7 +184,25 @@ func (s *LocalFileStorageService) Put(ctx context.Context, data []byte, expiry u } } - return os.Rename(f.Name(), batchPath) + _, err = os.Stat(batchPath) + if err != nil { + if os.IsNotExist(err) { + if err = os.Rename(f.Name(), batchPath); err != nil { + return err + } + renamed = true + } else { + return err + } + } + + if !s.enableLegacyLayout && s.layout.expiryEnabled { + if err := createHardLink(batchPath, s.layout.expiryPath(key, expiry)); err != nil { + return fmt.Errorf("couldn't create by-expiry-path index entry: %w", err) + } + } + + return nil } func (s *LocalFileStorageService) Sync(ctx context.Context) error { @@ -261,20 +284,22 @@ func copyFile(new, orig string) error { } // Creates an empty file, making any directories needed in the new file's path. -func createEmptyFile(new string) error { +func createHardLink(orig, new string) error { err := os.MkdirAll(path.Dir(new), 0o700) if err != nil { - return fmt.Errorf("failed to create directory %s: %w", path.Dir(new), err) + return err } - file, err := os.OpenFile(new, os.O_CREATE|os.O_WRONLY, 0o600) + err = os.Link(orig, new) if err != nil { - return fmt.Errorf("failed to create file %s: %w", new, err) + return err } - file.Close() return nil } +// migrate converts a file store from flatLayout to trieLayout. +// It is not thread safe and must be run before Put requests are served. +// The expiry index is only created if expiry is enabled. func migrate(fl *flatLayout, tl *trieLayout) error { flIt, err := fl.iterateBatches() if err != nil { @@ -304,6 +329,7 @@ func migrate(fl *flatLayout, tl *trieLayout) error { if tl.expiryEnabled && batch.expiry.Before(migrationStart) { skipped++ + log.Debug("skipping expired batch during migration", "expiry", batch.expiry, "start", migrationStart) continue // don't migrate expired batches } @@ -315,7 +341,7 @@ func migrate(fl *flatLayout, tl *trieLayout) error { if tl.expiryEnabled { expiryPath := tl.expiryPath(batch.key, uint64(batch.expiry.Unix())) - if err = createEmptyFile(expiryPath); err != nil { + if err = createHardLink(newPath, expiryPath); err != nil { return err } } @@ -351,37 +377,49 @@ func migrate(fl *flatLayout, tl *trieLayout) error { } func (tl *trieLayout) prune(pruneTil time.Time) error { + tl.writeMutex.Lock() + defer tl.writeMutex.Unlock() it, err := tl.iterateBatchesByTimestamp(pruneTil) if err != nil { return err } pruned := 0 pruningStart := time.Now() - for file, err := it.next(); !errors.Is(err, io.EOF); file, err = it.next() { + for pathByTimestamp, err := it.next(); !errors.Is(err, io.EOF); pathByTimestamp, err = it.next() { if err != nil { return err } - pathByTimestamp := path.Base(file) key, err := DecodeStorageServiceKey(path.Base(pathByTimestamp)) if err != nil { return err } + err = recursivelyDeleteUntil(pathByTimestamp, byExpiryTimestamp) + if err != nil { + log.Error("Couldn't prune expired batch expiry index entry, continuing trying to prune others", "path", pathByTimestamp, "err", err) + } + pathByHash := tl.batchPath(key) - err = recursivelyDeleteUntil(pathByHash, byDataHash) + info, err := os.Stat(pathByHash) if err != nil { if os.IsNotExist(err) { - log.Warn("Couldn't find batch to expire, it may have been previously deleted but its by-expiry-timestamp index entry still exists, trying to clean up the index next", "path", pathByHash, "indexPath", pathByTimestamp, "err", err) - + log.Warn("Couldn't find batch to expire, it may have been previously deleted but its by-expiry-timestamp index entry still existed, deleting its index entry and continuing", "path", pathByHash, "indexPath", pathByTimestamp, "err", err) } else { log.Error("Couldn't prune expired batch, continuing trying to prune others", "path", pathByHash, "err", err) - continue } - + continue } - err = recursivelyDeleteUntil(pathByTimestamp, byExpiryTimestamp) - if err != nil { - log.Error("Couldn't prune expired batch expiry index entry, continuing trying to prune others", "path", pathByHash, "err", err) + stat, ok := info.Sys().(*syscall.Stat_t) + if !ok { + log.Error("Couldn't convert file stats to Stat_t struct, possible OS or filesystem incompatibility, skipping pruning this batch", "file", pathByHash) + continue } + if stat.Nlink == 1 { + err = recursivelyDeleteUntil(pathByHash, byDataHash) + if err != nil { + + } + } + pruned++ } if pruned > 0 { @@ -477,6 +515,8 @@ const ( expiryDivisor = 10_000 ) +var expirySecondPartWidth = len(strconv.Itoa(expiryDivisor)) - 1 + type trieLayout struct { root string expiryEnabled bool @@ -484,6 +524,12 @@ type trieLayout struct { // Is the trieLayout currently being migrated to? // Controls whether paths include the migratingSuffix. migrating bool + + // Anything changing the layout (pruning, adding files) must go through + // this mutex. + // Pruning the entire history at statup of Arb Nova as of 2024-06-12 takes + // 5s on my laptop, so the overhead of pruning after startup should be neglibile. + writeMutex sync.Mutex } type trieLayoutIterator struct { @@ -509,7 +555,7 @@ func (l *trieLayout) batchPath(key common.Hash) string { func (l *trieLayout) expiryPath(key common.Hash, expiry uint64) string { encodedKey := EncodeStorageServiceKey(key) firstDir := fmt.Sprintf("%d", expiry/expiryDivisor) - secondDir := fmt.Sprintf("%d", expiry%expiryDivisor) + secondDir := fmt.Sprintf("%0*d", expirySecondPartWidth, expiry%expiryDivisor) topDir := byExpiryTimestamp if l.migrating { @@ -622,7 +668,7 @@ func (l *trieLayout) startMigration() error { return err } if migrated { - return errors.New("Local file storage already migrated to trieLayout") + return errors.New("local file storage already migrated to trieLayout") } l.migrating = true From 929c9c08853f33e2e43828ad9a198faff2893110 Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Wed, 12 Jun 2024 17:49:59 -0700 Subject: [PATCH 07/72] Unit test for handling of duplicates with expiry --- das/local_file_storage_service.go | 5 +- das/local_file_storage_service_test.go | 186 ++++++++++++++++++------- 2 files changed, 143 insertions(+), 48 deletions(-) diff --git a/das/local_file_storage_service.go b/das/local_file_storage_service.go index 10298e6826..20d8e21ea2 100644 --- a/das/local_file_storage_service.go +++ b/das/local_file_storage_service.go @@ -173,6 +173,9 @@ func (s *LocalFileStorageService) Put(ctx context.Context, data []byte, expiry u return err } + // For testing only. When migrating we treat the expiry time of existing flat layout + // files to be the modification time + the max allowed retention. So when creating + // new flat layout files, set their modification time accordingly. if s.enableLegacyLayout { tv := syscall.Timeval{ Sec: int64(expiry - uint64(s.legacyLayout.retention.Seconds())), @@ -637,7 +640,7 @@ func (l *trieLayout) iterateBatchesByTimestamp(maxTimestamp time.Time) (*trieLay if err != nil { return false } - return int64(num) <= maxTimestamp.Unix() + return int64(num) < maxTimestamp.Unix() } storageKeyFilter := func(layers *[][]string, idx int) bool { return isStorageServiceKey((*layers)[idx][0]) diff --git a/das/local_file_storage_service_test.go b/das/local_file_storage_service_test.go index 7eb6aea8f7..59e2f61a59 100644 --- a/das/local_file_storage_service_test.go +++ b/das/local_file_storage_service_test.go @@ -4,16 +4,68 @@ package das import ( + "bytes" "context" "errors" "io" "testing" "time" + + "github.com/offchainlabs/nitro/das/dastree" ) +func getByHashAndCheck(t *testing.T, s *LocalFileStorageService, xs ...string) { + t.Helper() + ctx := context.Background() + + for _, x := range xs { + actual, err := s.GetByHash(ctx, dastree.Hash([]byte(x))) + Require(t, err) + if !bytes.Equal([]byte(x), actual) { + Fail(t, "unexpected result") + } + } +} + +func countEntries(t *testing.T, layout *trieLayout, expected int) { + t.Helper() + + count := 0 + trIt, err := layout.iterateBatches() + Require(t, err) + for _, err := trIt.next(); !errors.Is(err, io.EOF); _, err = trIt.next() { + Require(t, err) + count++ + } + if count != expected { + Fail(t, "unexpected number of batches", "expected", expected, "was", count) + } +} + +func countTimestampEntries(t *testing.T, layout *trieLayout, cutoff time.Time, expected int) { + t.Helper() + var count int + trIt, err := layout.iterateBatchesByTimestamp(cutoff) + Require(t, err) + for _, err := trIt.next(); !errors.Is(err, io.EOF); _, err = trIt.next() { + Require(t, err) + count++ + } + if count != expected { + Fail(t, "unexpected count of entries when iterating by timestamp", "expected", expected, "was", count) + } +} + +func pruneCountRemaining(t *testing.T, layout *trieLayout, pruneTil time.Time, expected int) { + t.Helper() + err := layout.prune(pruneTil) + Require(t, err) + + countEntries(t, layout, expected) +} + func TestMigrationNoExpiry(t *testing.T) { dir := t.TempDir() - t.Logf("temp dir: %s", dir) ctx := context.Background() config := LocalFileStorageConfig{ @@ -37,24 +89,18 @@ func TestMigrationNoExpiry(t *testing.T) { err = s.Put(ctx, []byte("d"), now+10) Require(t, err) + getByHashAndCheck(t, s, "a", "b", "c", "d") + err = migrate(&s.legacyLayout, &s.layout) Require(t, err) + s.enableLegacyLayout = false - migrated := 0 - trIt, err := s.layout.iterateBatches() - Require(t, err) - for _, err := trIt.next(); !errors.Is(err, io.EOF); _, err = trIt.next() { - Require(t, err) - migrated++ - } - if migrated != 4 { - t.Fail() - } + countEntries(t, &s.layout, 4) + getByHashAndCheck(t, s, "a", "b", "c", "d") - // byTimestampEntries := 0 - trIt, err = s.layout.iterateBatchesByTimestamp(time.Unix(int64(now+10), 0)) + _, err = s.layout.iterateBatchesByTimestamp(time.Unix(int64(now+10), 0)) if err == nil { - t.Fail() + Fail(t, "can't iterate by timestamp when expiry is disabled") } } @@ -66,55 +112,101 @@ func TestMigrationExpiry(t *testing.T) { Enable: true, DataDir: dir, EnableExpiry: true, - MaxRetention: time.Hour * 24 * 30, + MaxRetention: time.Hour * 10, } s, err := NewLocalFileStorageService(config) Require(t, err) s.enableLegacyLayout = true - now := uint64(time.Now().Unix()) + now := time.Now() - err = s.Put(ctx, []byte("a"), now-expiryDivisor*2) + // Use increments of expiry divisor in order to span multiple by-expiry-timestamp dirs + err = s.Put(ctx, []byte("a"), uint64(now.Add(-2*time.Second*expiryDivisor).Unix())) Require(t, err) - err = s.Put(ctx, []byte("b"), now-expiryDivisor) + err = s.Put(ctx, []byte("b"), uint64(now.Add(-1*time.Second*expiryDivisor).Unix())) Require(t, err) - err = s.Put(ctx, []byte("c"), now+expiryDivisor) + err = s.Put(ctx, []byte("c"), uint64(now.Add(time.Second*expiryDivisor).Unix())) Require(t, err) - err = s.Put(ctx, []byte("d"), now+expiryDivisor) + err = s.Put(ctx, []byte("d"), uint64(now.Add(time.Second*expiryDivisor).Unix())) Require(t, err) - err = s.Put(ctx, []byte("e"), now+expiryDivisor*2) + err = s.Put(ctx, []byte("e"), uint64(now.Add(2*time.Second*expiryDivisor).Unix())) Require(t, err) - s.layout.expiryEnabled = true + getByHashAndCheck(t, s, "a", "b", "c", "d", "e") + err = migrate(&s.legacyLayout, &s.layout) Require(t, err) + s.enableLegacyLayout = false - migrated := 0 - trIt, err := s.layout.iterateBatches() - Require(t, err) - for _, err := trIt.next(); !errors.Is(err, io.EOF); _, err = trIt.next() { - Require(t, err) - migrated++ - } - if migrated != 3 { - t.Fail() - } + countEntries(t, &s.layout, 3) + getByHashAndCheck(t, s, "c", "d", "e") - countTimestampEntries := func(cutoff, expected uint64) { - var byTimestampEntries uint64 - trIt, err = s.layout.iterateBatchesByTimestamp(time.Unix(int64(cutoff), 0)) - Require(t, err) - for batch, err := trIt.next(); !errors.Is(err, io.EOF); batch, err = trIt.next() { - Require(t, err) - t.Logf("indexCreated %s", batch) - byTimestampEntries++ - } - if byTimestampEntries != expected { - t.Fail() - } + afterNow := now.Add(time.Second) + countTimestampEntries(t, &s.layout, afterNow, 0) // They should have all been filtered out since they're after now + countTimestampEntries(t, &s.layout, afterNow.Add(time.Second*expiryDivisor), 2) + countTimestampEntries(t, &s.layout, afterNow.Add(2*time.Second*expiryDivisor), 3) + + pruneCountRemaining(t, &s.layout, afterNow, 3) + getByHashAndCheck(t, s, "c", "d", "e") + + pruneCountRemaining(t, &s.layout, afterNow.Add(time.Second*expiryDivisor), 1) + getByHashAndCheck(t, s, "e") + + pruneCountRemaining(t, &s.layout, afterNow.Add(2*time.Second*expiryDivisor), 0) +} + +func TestExpiryDuplicates(t *testing.T) { + dir := t.TempDir() + ctx := context.Background() + + config := LocalFileStorageConfig{ + Enable: true, + DataDir: dir, + EnableExpiry: true, + MaxRetention: time.Hour * 10, } + s, err := NewLocalFileStorageService(config) + Require(t, err) + + now := time.Now() + + // Use increments of expiry divisor in order to span multiple by-expiry-timestamp dirs + err = s.Put(ctx, []byte("a"), uint64(now.Add(-2*time.Second*expiryDivisor).Unix())) + Require(t, err) + err = s.Put(ctx, []byte("a"), uint64(now.Add(-1*time.Second*expiryDivisor).Unix())) + Require(t, err) + err = s.Put(ctx, []byte("a"), uint64(now.Add(time.Second*expiryDivisor).Unix())) + Require(t, err) + err = s.Put(ctx, []byte("d"), uint64(now.Add(time.Second*expiryDivisor).Unix())) + Require(t, err) + err = s.Put(ctx, []byte("e"), uint64(now.Add(2*time.Second*expiryDivisor).Unix())) + Require(t, err) + err = s.Put(ctx, []byte("f"), uint64(now.Add(3*time.Second*expiryDivisor).Unix())) + Require(t, err) + + afterNow := now.Add(time.Second) + // "a" is duplicated + countEntries(t, &s.layout, 4) + // There should be a timestamp entry for each time "a" was added + countTimestampEntries(t, &s.layout, afterNow.Add(1000*time.Hour), 6) + + // We've expired the first "a", but there are still 2 other timestamp entries for it + pruneCountRemaining(t, &s.layout, afterNow.Add(-2*time.Second*expiryDivisor), 4) + countTimestampEntries(t, &s.layout, afterNow.Add(1000*time.Hour), 5) + + // We've expired the second "a", but there is still 1 other timestamp entry for it + pruneCountRemaining(t, &s.layout, afterNow.Add(-1*time.Second*expiryDivisor), 4) + countTimestampEntries(t, &s.layout, afterNow.Add(1000*time.Hour), 4) + + // We've expired the third "a", and also "d" + pruneCountRemaining(t, &s.layout, afterNow.Add(time.Second*expiryDivisor), 2) + countTimestampEntries(t, &s.layout, afterNow.Add(1000*time.Hour), 2) + + // We've expired the "e" + pruneCountRemaining(t, &s.layout, afterNow.Add(2*time.Second*expiryDivisor), 1) + countTimestampEntries(t, &s.layout, afterNow.Add(1000*time.Hour), 1) - countTimestampEntries(now, 0) // They should have all been filtered out since they're after now - countTimestampEntries(now+expiryDivisor, 2) - countTimestampEntries(now+expiryDivisor*2, 3) + // We've expired the "f" + pruneCountRemaining(t, &s.layout, afterNow.Add(3*time.Second*expiryDivisor), 0) + countTimestampEntries(t, &s.layout, afterNow.Add(1000*time.Hour), 0) } From 92aa569c8572c5da78ea3bda64cf6af3c621cdf6 Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Wed, 12 Jun 2024 22:46:43 -0700 Subject: [PATCH 08/72] Fix case of exactly same batch and expiry time --- das/local_file_storage_service.go | 23 +++++++++++++++++++---- das/local_file_storage_service_test.go | 3 +++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/das/local_file_storage_service.go b/das/local_file_storage_service.go index 20d8e21ea2..de3a1379da 100644 --- a/das/local_file_storage_service.go +++ b/das/local_file_storage_service.go @@ -286,18 +286,33 @@ func copyFile(new, orig string) error { return nil } -// Creates an empty file, making any directories needed in the new file's path. +// Creates a hard link at new, to orig, making any directories needed in the new link's path. func createHardLink(orig, new string) error { err := os.MkdirAll(path.Dir(new), 0o700) if err != nil { return err } - err = os.Link(orig, new) + info, err := os.Stat(new) if err != nil { - return err + if os.IsNotExist(err) { + err = os.Link(orig, new) + if err != nil { + return err + } + return nil + } else { + return err + } } - return nil + + // Hard link already exists + stat, ok := info.Sys().(*syscall.Stat_t) + if ok && stat.Nlink > 1 { + return nil + } + + return fmt.Errorf("file exists but is not a hard link: %s", new) } // migrate converts a file store from flatLayout to trieLayout. diff --git a/das/local_file_storage_service_test.go b/das/local_file_storage_service_test.go index 59e2f61a59..0b2ba9749d 100644 --- a/das/local_file_storage_service_test.go +++ b/das/local_file_storage_service_test.go @@ -183,6 +183,9 @@ func TestExpiryDuplicates(t *testing.T) { Require(t, err) err = s.Put(ctx, []byte("f"), uint64(now.Add(3*time.Second*expiryDivisor).Unix())) Require(t, err) + // Put the same entry and expiry again, should have no effect + err = s.Put(ctx, []byte("f"), uint64(now.Add(3*time.Second*expiryDivisor).Unix())) + Require(t, err) afterNow := now.Add(time.Second) // "a" is duplicated From 9c8b14646b026317b3fc3f339011768f3c261771 Mon Sep 17 00:00:00 2001 From: Raul Jordan Date: Thu, 13 Jun 2024 16:24:36 -0500 Subject: [PATCH 09/72] get machine hashes with step size --- system_tests/validation_mock_test.go | 5 ++ validator/client/validation_client.go | 11 ++++ validator/interface.go | 1 + validator/server_arb/execution_run.go | 80 +++++++++++++++++++++++++++ validator/valnode/validation_api.go | 13 +++++ 5 files changed, 110 insertions(+) diff --git a/system_tests/validation_mock_test.go b/system_tests/validation_mock_test.go index fb4f868571..38beee021b 100644 --- a/system_tests/validation_mock_test.go +++ b/system_tests/validation_mock_test.go @@ -126,6 +126,11 @@ func (r *mockExecRun) GetStepAt(position uint64) containers.PromiseInterface[*va }, nil) } +func (r *mockExecRun) GetMachineHashesWithStepSize(machineStartIndex, stepSize, numDesiredLeaves, fromBatch uint64) containers.PromiseInterface[[]common.Hash] { + // TODO: Add mock implementation for GetMachineHashesWithStepSize + return containers.NewReadyPromise[[]common.Hash](nil, nil) +} + func (r *mockExecRun) GetLastStep() containers.PromiseInterface[*validator.MachineStepResult] { return r.GetStepAt(mockExecLastPos) } diff --git a/validator/client/validation_client.go b/validator/client/validation_client.go index fa6b9000f2..c454cefa3c 100644 --- a/validator/client/validation_client.go +++ b/validator/client/validation_client.go @@ -197,6 +197,17 @@ func (r *ExecutionClientRun) GetStepAt(pos uint64) containers.PromiseInterface[* }) } +func (r *ExecutionClientRun) GetMachineHashesWithStepSize(fromBatch, machineStartIndex, stepSize, numDesiredLeaves uint64) containers.PromiseInterface[[]common.Hash] { + return stopwaiter.LaunchPromiseThread[[]common.Hash](r, func(ctx context.Context) ([]common.Hash, error) { + var resJson []common.Hash + err := r.client.client.CallContext(ctx, &resJson, server_api.Namespace+"_getMachineHashesWithStepSize", r.id, fromBatch, machineStartIndex, stepSize, numDesiredLeaves) + if err != nil { + return nil, err + } + return resJson, err + }) +} + func (r *ExecutionClientRun) GetProofAt(pos uint64) containers.PromiseInterface[[]byte] { return stopwaiter.LaunchPromiseThread[[]byte](r, func(ctx context.Context) ([]byte, error) { var resString string diff --git a/validator/interface.go b/validator/interface.go index 0324b996ed..1ae0b2ac69 100644 --- a/validator/interface.go +++ b/validator/interface.go @@ -30,6 +30,7 @@ type ExecutionSpawner interface { type ExecutionRun interface { GetStepAt(uint64) containers.PromiseInterface[*MachineStepResult] + GetMachineHashesWithStepSize(fromBatch, machineStartIndex, stepSize, numDesiredLeaves uint64) containers.PromiseInterface[[]common.Hash] GetLastStep() containers.PromiseInterface[*MachineStepResult] GetProofAt(uint64) containers.PromiseInterface[[]byte] PrepareRange(uint64, uint64) containers.PromiseInterface[struct{}] diff --git a/validator/server_arb/execution_run.go b/validator/server_arb/execution_run.go index 255d42ab16..8893d0b3da 100644 --- a/validator/server_arb/execution_run.go +++ b/validator/server_arb/execution_run.go @@ -7,7 +7,10 @@ import ( "context" "fmt" "sync" + "time" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/log" "github.com/offchainlabs/nitro/util/containers" "github.com/offchainlabs/nitro/util/stopwaiter" "github.com/offchainlabs/nitro/validator" @@ -79,6 +82,83 @@ func (e *executionRun) GetStepAt(position uint64) containers.PromiseInterface[*v }) } +func (e *executionRun) GetMachineHashesWithStepSize(fromBatch, machineStartIndex, stepSize, numDesiredLeaves uint64) containers.PromiseInterface[[]common.Hash] { + return stopwaiter.LaunchPromiseThread[[]common.Hash](e, func(ctx context.Context) ([]common.Hash, error) { + machine, err := e.cache.GetMachineAt(ctx, machineStartIndex) + if err != nil { + return nil, err + } + log.Debug(fmt.Sprintf("Advanced machine to index %d, beginning hash computation", machineStartIndex)) + // If the machine is starting at index 0, we always want to start at the "Machine finished" global state status + // to align with the machine hashes that the inbox machine will produce. + var machineHashes []common.Hash + + if machineStartIndex == 0 { + gs := machine.GetGlobalState() + log.Debug(fmt.Sprintf("Start global state for machine index 0: %+v", gs), "fromBatch", fromBatch) + machineHashes = append(machineHashes, machineFinishedHash(gs)) + } else { + // Otherwise, we simply append the machine hash at the specified start index. + machineHashes = append(machineHashes, machine.Hash()) + } + startHash := machineHashes[0] + + // If we only want 1 hash, we can return early. + if numDesiredLeaves == 1 { + return machineHashes, nil + } + + logInterval := numDesiredLeaves / 20 // Log every 5% progress + if logInterval == 0 { + logInterval = 1 + } + + start := time.Now() + for numIterations := uint64(0); numIterations < numDesiredLeaves; numIterations++ { + // The absolute opcode position the machine should be in after stepping. + position := machineStartIndex + stepSize*(numIterations+1) + + // Advance the machine in step size increments. + if err := machine.Step(ctx, stepSize); err != nil { + return nil, fmt.Errorf("failed to step machine to position %d: %w", position, err) + } + if numIterations%logInterval == 0 || numIterations == numDesiredLeaves-1 { + progressPercent := (float64(numIterations+1) / float64(numDesiredLeaves)) * 100 + log.Info( + fmt.Sprintf( + "Computing subchallenge progress: %.2f%% - %d of %d hashes needed", + progressPercent, + numIterations+1, + numDesiredLeaves, + ), + "fromBatch", fromBatch, + "machinePosition", numIterations*stepSize+machineStartIndex, + "timeSinceStart", time.Since(start), + "stepSize", stepSize, + "startHash", startHash, + "machineStartIndex", machineStartIndex, + "numDesiredLeaves", numDesiredLeaves, + ) + } + machineHashes = append(machineHashes, machine.Hash()) + if len(machineHashes) == int(numDesiredLeaves) { + break + } + } + log.Info( + "Successfully finished computing the data needed for opening a subchallenge", + "fromBatch", fromBatch, + "stepSize", stepSize, + "startHash", startHash, + "machineStartIndex", machineStartIndex, + "numDesiredLeaves", numDesiredLeaves, + "finishedHash", machineHashes[len(machineHashes)-1], + "finishedGlobalState", fmt.Sprintf("%+v", machine.GetGlobalState()), + ) + return machineHashes, nil + }) +} + func (e *executionRun) GetProofAt(position uint64) containers.PromiseInterface[[]byte] { return stopwaiter.LaunchPromiseThread[[]byte](e, func(ctx context.Context) ([]byte, error) { machine, err := e.cache.GetMachineAt(ctx, position) diff --git a/validator/valnode/validation_api.go b/validator/valnode/validation_api.go index a67299b1a1..075ab44397 100644 --- a/validator/valnode/validation_api.go +++ b/validator/valnode/validation_api.go @@ -148,6 +148,19 @@ func (a *ExecServerAPI) GetStepAt(ctx context.Context, execid uint64, position u return server_api.MachineStepResultToJson(res), nil } +func (a *ExecServerAPI) GetMachineHashesWithStepSize(ctx context.Context, execid, fromBatch, fromStep, stepSize, numDesiredLeaves uint64) ([]common.Hash, error) { + run, err := a.getRun(execid) + if err != nil { + return nil, err + } + leavesInRange := run.GetMachineHashesWithStepSize(fromBatch, fromStep, stepSize, numDesiredLeaves) + res, err := leavesInRange.Await(ctx) + if err != nil { + return nil, err + } + return res, nil +} + func (a *ExecServerAPI) GetProofAt(ctx context.Context, execid uint64, position uint64) (string, error) { run, err := a.getRun(execid) if err != nil { From 09c8b034d4add4386024de02597594fa71fced2f Mon Sep 17 00:00:00 2001 From: Raul Jordan Date: Thu, 13 Jun 2024 16:51:18 -0500 Subject: [PATCH 10/72] tests --- validator/server_arb/execution_run.go | 246 +++++++++++++-------- validator/server_arb/execution_run_test.go | 167 ++++++++++++++ 2 files changed, 317 insertions(+), 96 deletions(-) create mode 100644 validator/server_arb/execution_run_test.go diff --git a/validator/server_arb/execution_run.go b/validator/server_arb/execution_run.go index 8893d0b3da..059957fedf 100644 --- a/validator/server_arb/execution_run.go +++ b/validator/server_arb/execution_run.go @@ -10,6 +10,8 @@ import ( "time" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/log" "github.com/offchainlabs/nitro/util/containers" "github.com/offchainlabs/nitro/util/stopwaiter" @@ -18,8 +20,10 @@ import ( type executionRun struct { stopwaiter.StopWaiter - cache *MachineCache - close sync.Once + cache *MachineCache + initialMachineGetter func(context.Context) (MachineInterface, error) + config *MachineCacheConfig + close sync.Once } // NewExecutionChallengeBackend creates a backend with the given arguments. @@ -31,6 +35,8 @@ func NewExecutionRun( ) (*executionRun, error) { exec := &executionRun{} exec.Start(ctxIn, exec) + exec.initialMachineGetter = initialMachineGetter + exec.config = config exec.cache = NewMachineCache(exec.GetContext(), initialMachineGetter, config) return exec, nil } @@ -53,110 +59,150 @@ func (e *executionRun) PrepareRange(start uint64, end uint64) containers.Promise func (e *executionRun) GetStepAt(position uint64) containers.PromiseInterface[*validator.MachineStepResult] { return stopwaiter.LaunchPromiseThread[*validator.MachineStepResult](e, func(ctx context.Context) (*validator.MachineStepResult, error) { - var machine MachineInterface - var err error - if position == ^uint64(0) { - machine, err = e.cache.GetFinalMachine(ctx) - } else { - // todo cache last machine - machine, err = e.cache.GetMachineAt(ctx, position) - } - if err != nil { - return nil, err - } - machineStep := machine.GetStepCount() - if position != machineStep { - machineRunning := machine.IsRunning() - if machineRunning || machineStep > position { - return nil, fmt.Errorf("machine is in wrong position want: %d, got: %d", position, machine.GetStepCount()) - } - - } - result := &validator.MachineStepResult{ - Position: machineStep, - Status: validator.MachineStatus(machine.Status()), - GlobalState: machine.GetGlobalState(), - Hash: machine.Hash(), - } - return result, nil + return e.intermediateGetStepAt(ctx, position) }) } func (e *executionRun) GetMachineHashesWithStepSize(fromBatch, machineStartIndex, stepSize, numDesiredLeaves uint64) containers.PromiseInterface[[]common.Hash] { - return stopwaiter.LaunchPromiseThread[[]common.Hash](e, func(ctx context.Context) ([]common.Hash, error) { - machine, err := e.cache.GetMachineAt(ctx, machineStartIndex) - if err != nil { - return nil, err + return stopwaiter.LaunchPromiseThread(e, func(ctx context.Context) ([]common.Hash, error) { + return e.machineHashesWithStepSize(ctx, machineHashesWithStepSizeArgs{ + startIndex: machineStartIndex, + fromBatch: fromBatch, + stepSize: stepSize, + requiredNumHashes: numDesiredLeaves, + }) + }) +} + +type GlobalStateGetter interface { + GetGlobalState() validator.GoGlobalState + HashStepper +} + +type HashStepper interface { + Step(ctx context.Context, stepCount uint64) error + Hash() common.Hash +} + +type machineHashesWithStepSizeArgs struct { + startIndex uint64 + fromBatch uint64 + stepSize uint64 + requiredNumHashes uint64 + getMachineAtIndex func(context.Context, uint64) (GlobalStateGetter, error) +} + +func (e *executionRun) machineHashesWithStepSize( + ctx context.Context, + args machineHashesWithStepSizeArgs, +) ([]common.Hash, error) { + if args.stepSize == 0 { + return nil, fmt.Errorf("step size cannot be 0") + } + if args.requiredNumHashes == 0 { + return nil, fmt.Errorf("required number of hashes cannot be 0") + } + machine, err := args.getMachineAtIndex(ctx, args.startIndex) + if err != nil { + return nil, err + } + log.Debug(fmt.Sprintf("Advanced machine to index %d, beginning hash computation", args.startIndex)) + + // If the machine is starting at index 0, we always want to start at the "Machine finished" global state status + // to align with the machine hashes that the inbox machine will produce. + var machineHashes []common.Hash + if args.startIndex == 0 { + gs := machine.GetGlobalState() + log.Debug(fmt.Sprintf("Start global state for machine index 0: %+v", gs), "fromBatch", args.fromBatch) + machineHashes = append(machineHashes, machineFinishedHash(gs)) + } else { + // Otherwise, we simply append the machine hash at the specified start index. + machineHashes = append(machineHashes, machine.Hash()) + } + startHash := machineHashes[0] + + // If we only want 1 hash, we can return early. + if args.requiredNumHashes == 1 { + return machineHashes, nil + } + + logInterval := args.requiredNumHashes / 20 // Log every 5% progress + if logInterval == 0 { + logInterval = 1 + } + + start := time.Now() + for numIterations := uint64(0); numIterations < args.requiredNumHashes; numIterations++ { + // The absolute program counter the machine should be in after stepping. + absoluteMachineIndex := args.startIndex + args.stepSize*(numIterations+1) + + // Advance the machine in step size increments. + if err := machine.Step(ctx, args.stepSize); err != nil { + return nil, fmt.Errorf("failed to step machine to position %d: %w", absoluteMachineIndex, err) } - log.Debug(fmt.Sprintf("Advanced machine to index %d, beginning hash computation", machineStartIndex)) - // If the machine is starting at index 0, we always want to start at the "Machine finished" global state status - // to align with the machine hashes that the inbox machine will produce. - var machineHashes []common.Hash - - if machineStartIndex == 0 { - gs := machine.GetGlobalState() - log.Debug(fmt.Sprintf("Start global state for machine index 0: %+v", gs), "fromBatch", fromBatch) - machineHashes = append(machineHashes, machineFinishedHash(gs)) - } else { - // Otherwise, we simply append the machine hash at the specified start index. - machineHashes = append(machineHashes, machine.Hash()) + if numIterations%logInterval == 0 || numIterations == args.requiredNumHashes-1 { + progressPercent := (float64(numIterations+1) / float64(args.requiredNumHashes)) * 100 + log.Info( + fmt.Sprintf( + "Computing BOLD subchallenge progress: %.2f%% - %d of %d hashes needed", + progressPercent, + numIterations+1, + args.requiredNumHashes, + ), + "fromBatch", args.fromBatch, + "machinePosition", numIterations*args.stepSize+args.startIndex, + "timeSinceStart", time.Since(start), + "stepSize", args.stepSize, + "startHash", startHash, + "machineStartIndex", args.startIndex, + "numDesiredLeaves", args.requiredNumHashes, + ) } - startHash := machineHashes[0] - - // If we only want 1 hash, we can return early. - if numDesiredLeaves == 1 { - return machineHashes, nil + machineHashes = append(machineHashes, machine.Hash()) + if uint64(len(machineHashes)) == args.requiredNumHashes { + break } + } + log.Info( + "Successfully finished computing the data needed for opening a subchallenge", + "fromBatch", args.fromBatch, + "stepSize", args.stepSize, + "startHash", startHash, + "machineStartIndex", args.startIndex, + "numDesiredLeaves", args.requiredNumHashes, + "finishedHash", machineHashes[len(machineHashes)-1], + "finishedGlobalState", fmt.Sprintf("%+v", machine.GetGlobalState()), + ) + return machineHashes, nil +} - logInterval := numDesiredLeaves / 20 // Log every 5% progress - if logInterval == 0 { - logInterval = 1 +func (e *executionRun) intermediateGetStepAt(ctx context.Context, position uint64) (*validator.MachineStepResult, error) { + var machine MachineInterface + var err error + if position == ^uint64(0) { + machine, err = e.cache.GetFinalMachine(ctx) + } else { + // TODO(rauljordan): Cache last machine. + machine, err = e.cache.GetMachineAt(ctx, position) + } + if err != nil { + return nil, err + } + machineStep := machine.GetStepCount() + if position != machineStep { + machineRunning := machine.IsRunning() + if machineRunning || machineStep > position { + return nil, fmt.Errorf("machine is in wrong position want: %d, got: %d", position, machine.GetStepCount()) } - start := time.Now() - for numIterations := uint64(0); numIterations < numDesiredLeaves; numIterations++ { - // The absolute opcode position the machine should be in after stepping. - position := machineStartIndex + stepSize*(numIterations+1) - - // Advance the machine in step size increments. - if err := machine.Step(ctx, stepSize); err != nil { - return nil, fmt.Errorf("failed to step machine to position %d: %w", position, err) - } - if numIterations%logInterval == 0 || numIterations == numDesiredLeaves-1 { - progressPercent := (float64(numIterations+1) / float64(numDesiredLeaves)) * 100 - log.Info( - fmt.Sprintf( - "Computing subchallenge progress: %.2f%% - %d of %d hashes needed", - progressPercent, - numIterations+1, - numDesiredLeaves, - ), - "fromBatch", fromBatch, - "machinePosition", numIterations*stepSize+machineStartIndex, - "timeSinceStart", time.Since(start), - "stepSize", stepSize, - "startHash", startHash, - "machineStartIndex", machineStartIndex, - "numDesiredLeaves", numDesiredLeaves, - ) - } - machineHashes = append(machineHashes, machine.Hash()) - if len(machineHashes) == int(numDesiredLeaves) { - break - } - } - log.Info( - "Successfully finished computing the data needed for opening a subchallenge", - "fromBatch", fromBatch, - "stepSize", stepSize, - "startHash", startHash, - "machineStartIndex", machineStartIndex, - "numDesiredLeaves", numDesiredLeaves, - "finishedHash", machineHashes[len(machineHashes)-1], - "finishedGlobalState", fmt.Sprintf("%+v", machine.GetGlobalState()), - ) - return machineHashes, nil - }) + } + result := &validator.MachineStepResult{ + Position: machineStep, + Status: validator.MachineStatus(machine.Status()), + GlobalState: machine.GetGlobalState(), + Hash: machine.Hash(), + } + return result, nil } func (e *executionRun) GetProofAt(position uint64) containers.PromiseInterface[[]byte] { @@ -172,3 +218,11 @@ func (e *executionRun) GetProofAt(position uint64) containers.PromiseInterface[[ func (e *executionRun) GetLastStep() containers.PromiseInterface[*validator.MachineStepResult] { return e.GetStepAt(^uint64(0)) } + +func (e *executionRun) CheckAlive(ctx context.Context) error { + return nil +} + +func machineFinishedHash(gs validator.GoGlobalState) common.Hash { + return crypto.Keccak256Hash([]byte("Machine finished:"), gs.Hash().Bytes()) +} diff --git a/validator/server_arb/execution_run_test.go b/validator/server_arb/execution_run_test.go new file mode 100644 index 0000000000..c1f0e1ef50 --- /dev/null +++ b/validator/server_arb/execution_run_test.go @@ -0,0 +1,167 @@ +package server_arb + +import ( + "context" + "strings" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/offchainlabs/nitro/validator" +) + +var _ GlobalStateGetter = (*mockMachine)(nil) + +type mockMachine struct { + gs validator.GoGlobalState + totalSteps uint64 +} + +func (m *mockMachine) Hash() common.Hash { + if m.gs.PosInBatch == m.totalSteps-1 { + return machineFinishedHash(m.gs) + } + return m.gs.Hash() +} + +func (m *mockMachine) GetGlobalState() validator.GoGlobalState { + return m.gs +} + +func (m *mockMachine) Step(ctx context.Context, stepSize uint64) error { + for i := uint64(0); i < stepSize; i++ { + if m.gs.PosInBatch == m.totalSteps-1 { + return nil + } + m.gs.PosInBatch += 1 + } + return nil +} + +func Test_machineHashesWithStep(t *testing.T) { + mm := &mockMachine{} + e := &executionRun{} + ctx := context.Background() + + machGetter := func(ctx context.Context, index uint64) (GlobalStateGetter, error) { + return mm, nil + } + t.Run("basic argument checks", func(t *testing.T) { + _, err := e.machineHashesWithStepSize(ctx, machineHashesWithStepSizeArgs{ + stepSize: 0, + }) + if !strings.Contains(err.Error(), "step size cannot be 0") { + t.Fatal("Wrong error") + } + _, err = e.machineHashesWithStepSize(ctx, machineHashesWithStepSizeArgs{ + stepSize: 1, + requiredNumHashes: 0, + }) + if !strings.Contains(err.Error(), "required number of hashes cannot be 0") { + t.Fatal("Wrong error") + } + }) + t.Run("machine at start index 0 hash is the finished state hash", func(t *testing.T) { + mm.gs = validator.GoGlobalState{ + Batch: 1, + } + hashes, err := e.machineHashesWithStepSize(ctx, machineHashesWithStepSizeArgs{ + fromBatch: 0, + stepSize: 1, + requiredNumHashes: 1, + startIndex: 0, + getMachineAtIndex: machGetter, + }) + if err != nil { + t.Fatal(err) + } + expected := machineFinishedHash(mm.gs) + if len(hashes) != 1 { + t.Fatal("Wanted one hash") + } + if expected != hashes[0] { + t.Fatalf("Wanted %#x, got %#x", expected, hashes[0]) + } + }) + t.Run("can step in step size increments and collect hashes", func(t *testing.T) { + initialGs := validator.GoGlobalState{ + Batch: 1, + PosInBatch: 0, + } + mm.gs = initialGs + mm.totalSteps = 20 + stepSize := uint64(5) + hashes, err := e.machineHashesWithStepSize(ctx, machineHashesWithStepSizeArgs{ + fromBatch: 1, + stepSize: stepSize, + requiredNumHashes: 4, + startIndex: 0, + getMachineAtIndex: machGetter, + }) + if err != nil { + t.Fatal(err) + } + expectedHashes := make([]common.Hash, 0) + for i := uint64(0); i < 4; i++ { + if i == 0 { + expectedHashes = append(expectedHashes, machineFinishedHash(initialGs)) + continue + } + gs := validator.GoGlobalState{ + Batch: 1, + PosInBatch: uint64(i * stepSize), + } + expectedHashes = append(expectedHashes, gs.Hash()) + } + if len(hashes) != len(expectedHashes) { + t.Fatal("Wanted one hash") + } + for i := range hashes { + if expectedHashes[i] != hashes[i] { + t.Fatalf("Wanted at index %d, %#x, got %#x", i, expectedHashes[i], hashes[i]) + } + } + }) + t.Run("if finishes execution early, simply pads the remaining desired hashes with the machine finished hash", func(t *testing.T) { + initialGs := validator.GoGlobalState{ + Batch: 1, + PosInBatch: 0, + } + mm.gs = initialGs + mm.totalSteps = 20 + stepSize := uint64(5) + hashes, err := e.machineHashesWithStepSize(ctx, machineHashesWithStepSizeArgs{ + fromBatch: 1, + stepSize: stepSize, + requiredNumHashes: 10, + startIndex: 0, + getMachineAtIndex: machGetter, + }) + if err != nil { + t.Fatal(err) + } + expectedHashes := make([]common.Hash, 0) + for i := uint64(0); i < 4; i++ { + if i == 0 { + expectedHashes = append(expectedHashes, machineFinishedHash(initialGs)) + continue + } + gs := validator.GoGlobalState{ + Batch: 1, + PosInBatch: uint64(i * stepSize), + } + expectedHashes = append(expectedHashes, gs.Hash()) + } + // The rest of the expected hashes should be the machine finished hash repeated. + for i := uint64(4); i < 10; i++ { + expectedHashes = append(expectedHashes, machineFinishedHash(mm.gs)) + } + if len(hashes) != len(expectedHashes) { + t.Fatal("Wanted one hash") + } + for i := range hashes { + if expectedHashes[i] != hashes[i] { + t.Fatalf("Wanted at index %d, %#x, got %#x", i, expectedHashes[i], hashes[i]) + } + } + }) +} From f9470c03bd2e9d8be1ca78f37618f5fda27ad719 Mon Sep 17 00:00:00 2001 From: Raul Jordan Date: Thu, 13 Jun 2024 16:55:40 -0500 Subject: [PATCH 11/72] rem old --- validator/server_arb/execution_run.go | 68 +++++++++++---------------- 1 file changed, 28 insertions(+), 40 deletions(-) diff --git a/validator/server_arb/execution_run.go b/validator/server_arb/execution_run.go index 059957fedf..c3f78e8963 100644 --- a/validator/server_arb/execution_run.go +++ b/validator/server_arb/execution_run.go @@ -20,10 +20,8 @@ import ( type executionRun struct { stopwaiter.StopWaiter - cache *MachineCache - initialMachineGetter func(context.Context) (MachineInterface, error) - config *MachineCacheConfig - close sync.Once + cache *MachineCache + close sync.Once } // NewExecutionChallengeBackend creates a backend with the given arguments. @@ -35,8 +33,6 @@ func NewExecutionRun( ) (*executionRun, error) { exec := &executionRun{} exec.Start(ctxIn, exec) - exec.initialMachineGetter = initialMachineGetter - exec.config = config exec.cache = NewMachineCache(exec.GetContext(), initialMachineGetter, config) return exec, nil } @@ -59,7 +55,32 @@ func (e *executionRun) PrepareRange(start uint64, end uint64) containers.Promise func (e *executionRun) GetStepAt(position uint64) containers.PromiseInterface[*validator.MachineStepResult] { return stopwaiter.LaunchPromiseThread[*validator.MachineStepResult](e, func(ctx context.Context) (*validator.MachineStepResult, error) { - return e.intermediateGetStepAt(ctx, position) + var machine MachineInterface + var err error + if position == ^uint64(0) { + machine, err = e.cache.GetFinalMachine(ctx) + } else { + // TODO(rauljordan): Cache last machine. + machine, err = e.cache.GetMachineAt(ctx, position) + } + if err != nil { + return nil, err + } + machineStep := machine.GetStepCount() + if position != machineStep { + machineRunning := machine.IsRunning() + if machineRunning || machineStep > position { + return nil, fmt.Errorf("machine is in wrong position want: %d, got: %d", position, machine.GetStepCount()) + } + + } + result := &validator.MachineStepResult{ + Position: machineStep, + Status: validator.MachineStatus(machine.Status()), + GlobalState: machine.GetGlobalState(), + Hash: machine.Hash(), + } + return result, nil }) } @@ -176,35 +197,6 @@ func (e *executionRun) machineHashesWithStepSize( return machineHashes, nil } -func (e *executionRun) intermediateGetStepAt(ctx context.Context, position uint64) (*validator.MachineStepResult, error) { - var machine MachineInterface - var err error - if position == ^uint64(0) { - machine, err = e.cache.GetFinalMachine(ctx) - } else { - // TODO(rauljordan): Cache last machine. - machine, err = e.cache.GetMachineAt(ctx, position) - } - if err != nil { - return nil, err - } - machineStep := machine.GetStepCount() - if position != machineStep { - machineRunning := machine.IsRunning() - if machineRunning || machineStep > position { - return nil, fmt.Errorf("machine is in wrong position want: %d, got: %d", position, machine.GetStepCount()) - } - - } - result := &validator.MachineStepResult{ - Position: machineStep, - Status: validator.MachineStatus(machine.Status()), - GlobalState: machine.GetGlobalState(), - Hash: machine.Hash(), - } - return result, nil -} - func (e *executionRun) GetProofAt(position uint64) containers.PromiseInterface[[]byte] { return stopwaiter.LaunchPromiseThread[[]byte](e, func(ctx context.Context) ([]byte, error) { machine, err := e.cache.GetMachineAt(ctx, position) @@ -219,10 +211,6 @@ func (e *executionRun) GetLastStep() containers.PromiseInterface[*validator.Mach return e.GetStepAt(^uint64(0)) } -func (e *executionRun) CheckAlive(ctx context.Context) error { - return nil -} - func machineFinishedHash(gs validator.GoGlobalState) common.Hash { return crypto.Keccak256Hash([]byte("Machine finished:"), gs.Hash().Bytes()) } From 18a2510b23d8832d5dc3f9cca649bf20373fd5cb Mon Sep 17 00:00:00 2001 From: Raul Jordan Date: Thu, 13 Jun 2024 16:59:14 -0500 Subject: [PATCH 12/72] done --- system_tests/validation_mock_test.go | 1 - validator/server_arb/execution_run.go | 1 - 2 files changed, 2 deletions(-) diff --git a/system_tests/validation_mock_test.go b/system_tests/validation_mock_test.go index 38beee021b..0731c6564b 100644 --- a/system_tests/validation_mock_test.go +++ b/system_tests/validation_mock_test.go @@ -127,7 +127,6 @@ func (r *mockExecRun) GetStepAt(position uint64) containers.PromiseInterface[*va } func (r *mockExecRun) GetMachineHashesWithStepSize(machineStartIndex, stepSize, numDesiredLeaves, fromBatch uint64) containers.PromiseInterface[[]common.Hash] { - // TODO: Add mock implementation for GetMachineHashesWithStepSize return containers.NewReadyPromise[[]common.Hash](nil, nil) } diff --git a/validator/server_arb/execution_run.go b/validator/server_arb/execution_run.go index c3f78e8963..de60d987f2 100644 --- a/validator/server_arb/execution_run.go +++ b/validator/server_arb/execution_run.go @@ -60,7 +60,6 @@ func (e *executionRun) GetStepAt(position uint64) containers.PromiseInterface[*v if position == ^uint64(0) { machine, err = e.cache.GetFinalMachine(ctx) } else { - // TODO(rauljordan): Cache last machine. machine, err = e.cache.GetMachineAt(ctx, position) } if err != nil { From 71c0a3d57697a03ad004b27e7559ecc2b8cf5a9e Mon Sep 17 00:00:00 2001 From: Raul Jordan Date: Mon, 17 Jun 2024 11:03:02 -0500 Subject: [PATCH 13/72] tsahi feedback --- system_tests/validation_mock_test.go | 2 +- validator/client/validation_client.go | 4 +- validator/interface.go | 2 +- validator/server_arb/execution_run.go | 77 ++++++---------- validator/server_arb/execution_run_test.go | 101 ++++++++++++++------- validator/server_arb/machine_cache.go | 1 + validator/valnode/validation_api.go | 4 +- 7 files changed, 102 insertions(+), 89 deletions(-) diff --git a/system_tests/validation_mock_test.go b/system_tests/validation_mock_test.go index 0731c6564b..d80a2041ec 100644 --- a/system_tests/validation_mock_test.go +++ b/system_tests/validation_mock_test.go @@ -126,7 +126,7 @@ func (r *mockExecRun) GetStepAt(position uint64) containers.PromiseInterface[*va }, nil) } -func (r *mockExecRun) GetMachineHashesWithStepSize(machineStartIndex, stepSize, numDesiredLeaves, fromBatch uint64) containers.PromiseInterface[[]common.Hash] { +func (r *mockExecRun) GetMachineHashesWithStepSize(machineStartIndex, stepSize, numRequiredHashes uint64) containers.PromiseInterface[[]common.Hash] { return containers.NewReadyPromise[[]common.Hash](nil, nil) } diff --git a/validator/client/validation_client.go b/validator/client/validation_client.go index c454cefa3c..0f40ef0387 100644 --- a/validator/client/validation_client.go +++ b/validator/client/validation_client.go @@ -197,10 +197,10 @@ func (r *ExecutionClientRun) GetStepAt(pos uint64) containers.PromiseInterface[* }) } -func (r *ExecutionClientRun) GetMachineHashesWithStepSize(fromBatch, machineStartIndex, stepSize, numDesiredLeaves uint64) containers.PromiseInterface[[]common.Hash] { +func (r *ExecutionClientRun) GetMachineHashesWithStepSize(machineStartIndex, stepSize, numRequiredHashes uint64) containers.PromiseInterface[[]common.Hash] { return stopwaiter.LaunchPromiseThread[[]common.Hash](r, func(ctx context.Context) ([]common.Hash, error) { var resJson []common.Hash - err := r.client.client.CallContext(ctx, &resJson, server_api.Namespace+"_getMachineHashesWithStepSize", r.id, fromBatch, machineStartIndex, stepSize, numDesiredLeaves) + err := r.client.client.CallContext(ctx, &resJson, server_api.Namespace+"_getMachineHashesWithStepSize", r.id, machineStartIndex, stepSize, numRequiredHashes) if err != nil { return nil, err } diff --git a/validator/interface.go b/validator/interface.go index 1ae0b2ac69..58ad841ff5 100644 --- a/validator/interface.go +++ b/validator/interface.go @@ -30,7 +30,7 @@ type ExecutionSpawner interface { type ExecutionRun interface { GetStepAt(uint64) containers.PromiseInterface[*MachineStepResult] - GetMachineHashesWithStepSize(fromBatch, machineStartIndex, stepSize, numDesiredLeaves uint64) containers.PromiseInterface[[]common.Hash] + GetMachineHashesWithStepSize(machineStartIndex, stepSize, numRequiredHashes uint64) containers.PromiseInterface[[]common.Hash] GetLastStep() containers.PromiseInterface[*MachineStepResult] GetProofAt(uint64) containers.PromiseInterface[[]byte] PrepareRange(uint64, uint64) containers.PromiseInterface[struct{}] diff --git a/validator/server_arb/execution_run.go b/validator/server_arb/execution_run.go index de60d987f2..1d765688d5 100644 --- a/validator/server_arb/execution_run.go +++ b/validator/server_arb/execution_run.go @@ -83,57 +83,36 @@ func (e *executionRun) GetStepAt(position uint64) containers.PromiseInterface[*v }) } -func (e *executionRun) GetMachineHashesWithStepSize(fromBatch, machineStartIndex, stepSize, numDesiredLeaves uint64) containers.PromiseInterface[[]common.Hash] { +func (e *executionRun) GetMachineHashesWithStepSize(machineStartIndex, stepSize, requiredNumHashes uint64) containers.PromiseInterface[[]common.Hash] { return stopwaiter.LaunchPromiseThread(e, func(ctx context.Context) ([]common.Hash, error) { - return e.machineHashesWithStepSize(ctx, machineHashesWithStepSizeArgs{ - startIndex: machineStartIndex, - fromBatch: fromBatch, - stepSize: stepSize, - requiredNumHashes: numDesiredLeaves, - }) + return e.machineHashesWithStepSize(ctx, machineStartIndex, stepSize, requiredNumHashes) }) } -type GlobalStateGetter interface { - GetGlobalState() validator.GoGlobalState - HashStepper -} - -type HashStepper interface { - Step(ctx context.Context, stepCount uint64) error - Hash() common.Hash -} - -type machineHashesWithStepSizeArgs struct { - startIndex uint64 - fromBatch uint64 - stepSize uint64 - requiredNumHashes uint64 - getMachineAtIndex func(context.Context, uint64) (GlobalStateGetter, error) -} - func (e *executionRun) machineHashesWithStepSize( ctx context.Context, - args machineHashesWithStepSizeArgs, + machineStartIndex, + stepSize, + requiredNumHashes uint64, ) ([]common.Hash, error) { - if args.stepSize == 0 { + if stepSize == 0 { return nil, fmt.Errorf("step size cannot be 0") } - if args.requiredNumHashes == 0 { + if requiredNumHashes == 0 { return nil, fmt.Errorf("required number of hashes cannot be 0") } - machine, err := args.getMachineAtIndex(ctx, args.startIndex) + machine, err := e.cache.GetMachineAt(ctx, machineStartIndex) if err != nil { return nil, err } - log.Debug(fmt.Sprintf("Advanced machine to index %d, beginning hash computation", args.startIndex)) + log.Debug(fmt.Sprintf("Advanced machine to index %d, beginning hash computation", machineStartIndex)) // If the machine is starting at index 0, we always want to start at the "Machine finished" global state status // to align with the machine hashes that the inbox machine will produce. var machineHashes []common.Hash - if args.startIndex == 0 { + if machineStartIndex == 0 { gs := machine.GetGlobalState() - log.Debug(fmt.Sprintf("Start global state for machine index 0: %+v", gs), "fromBatch", args.fromBatch) + log.Debug(fmt.Sprintf("Start global state for machine index 0: %+v", gs)) machineHashes = append(machineHashes, machineFinishedHash(gs)) } else { // Otherwise, we simply append the machine hash at the specified start index. @@ -142,54 +121,52 @@ func (e *executionRun) machineHashesWithStepSize( startHash := machineHashes[0] // If we only want 1 hash, we can return early. - if args.requiredNumHashes == 1 { + if requiredNumHashes == 1 { return machineHashes, nil } - logInterval := args.requiredNumHashes / 20 // Log every 5% progress + logInterval := requiredNumHashes / 20 // Log every 5% progress if logInterval == 0 { logInterval = 1 } start := time.Now() - for numIterations := uint64(0); numIterations < args.requiredNumHashes; numIterations++ { + for numIterations := uint64(0); numIterations < requiredNumHashes; numIterations++ { // The absolute program counter the machine should be in after stepping. - absoluteMachineIndex := args.startIndex + args.stepSize*(numIterations+1) + absoluteMachineIndex := machineStartIndex + stepSize*(numIterations+1) // Advance the machine in step size increments. - if err := machine.Step(ctx, args.stepSize); err != nil { + if err := machine.Step(ctx, stepSize); err != nil { return nil, fmt.Errorf("failed to step machine to position %d: %w", absoluteMachineIndex, err) } - if numIterations%logInterval == 0 || numIterations == args.requiredNumHashes-1 { - progressPercent := (float64(numIterations+1) / float64(args.requiredNumHashes)) * 100 + if numIterations%logInterval == 0 || numIterations == requiredNumHashes-1 { + progressPercent := (float64(numIterations+1) / float64(requiredNumHashes)) * 100 log.Info( fmt.Sprintf( "Computing BOLD subchallenge progress: %.2f%% - %d of %d hashes needed", progressPercent, numIterations+1, - args.requiredNumHashes, + requiredNumHashes, ), - "fromBatch", args.fromBatch, - "machinePosition", numIterations*args.stepSize+args.startIndex, + "machinePosition", numIterations*stepSize+machineStartIndex, "timeSinceStart", time.Since(start), - "stepSize", args.stepSize, + "stepSize", stepSize, "startHash", startHash, - "machineStartIndex", args.startIndex, - "numDesiredLeaves", args.requiredNumHashes, + "machineStartIndex", machine, + "numDesiredLeaves", requiredNumHashes, ) } machineHashes = append(machineHashes, machine.Hash()) - if uint64(len(machineHashes)) == args.requiredNumHashes { + if uint64(len(machineHashes)) == requiredNumHashes { break } } log.Info( "Successfully finished computing the data needed for opening a subchallenge", - "fromBatch", args.fromBatch, - "stepSize", args.stepSize, + "stepSize", stepSize, "startHash", startHash, - "machineStartIndex", args.startIndex, - "numDesiredLeaves", args.requiredNumHashes, + "machineStartIndex", machineStartIndex, + "numDesiredLeaves", requiredNumHashes, "finishedHash", machineHashes[len(machineHashes)-1], "finishedGlobalState", fmt.Sprintf("%+v", machine.GetGlobalState()), ) diff --git a/validator/server_arb/execution_run_test.go b/validator/server_arb/execution_run_test.go index c1f0e1ef50..24e401f351 100644 --- a/validator/server_arb/execution_run_test.go +++ b/validator/server_arb/execution_run_test.go @@ -4,13 +4,12 @@ import ( "context" "strings" "testing" + "time" "github.com/ethereum/go-ethereum/common" "github.com/offchainlabs/nitro/validator" ) -var _ GlobalStateGetter = (*mockMachine)(nil) - type mockMachine struct { gs validator.GoGlobalState totalSteps uint64 @@ -37,25 +36,45 @@ func (m *mockMachine) Step(ctx context.Context, stepSize uint64) error { return nil } +func (m *mockMachine) CloneMachineInterface() MachineInterface { + return m +} +func (m *mockMachine) GetStepCount() uint64 { + return m.totalSteps +} +func (m *mockMachine) IsRunning() bool { + return m.gs.PosInBatch < m.totalSteps-1 +} +func (m *mockMachine) ValidForStep(uint64) bool { + return true +} +func (m *mockMachine) Status() uint8 { + if m.gs.PosInBatch == m.totalSteps-1 { + return uint8(validator.MachineStatusFinished) + } + return uint8(validator.MachineStatusRunning) +} +func (m *mockMachine) ProveNextStep() []byte { + return nil +} +func (m *mockMachine) Freeze() {} +func (m *mockMachine) Destroy() {} + func Test_machineHashesWithStep(t *testing.T) { mm := &mockMachine{} e := &executionRun{} ctx := context.Background() - machGetter := func(ctx context.Context, index uint64) (GlobalStateGetter, error) { - return mm, nil - } t.Run("basic argument checks", func(t *testing.T) { - _, err := e.machineHashesWithStepSize(ctx, machineHashesWithStepSizeArgs{ - stepSize: 0, - }) + machStartIndex := uint64(0) + stepSize := uint64(0) + numRequiredHashes := uint64(0) + _, err := e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, numRequiredHashes) if !strings.Contains(err.Error(), "step size cannot be 0") { t.Fatal("Wrong error") } - _, err = e.machineHashesWithStepSize(ctx, machineHashesWithStepSizeArgs{ - stepSize: 1, - requiredNumHashes: 0, - }) + stepSize = uint64(1) + _, err = e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, numRequiredHashes) if !strings.Contains(err.Error(), "required number of hashes cannot be 0") { t.Fatal("Wrong error") } @@ -64,13 +83,19 @@ func Test_machineHashesWithStep(t *testing.T) { mm.gs = validator.GoGlobalState{ Batch: 1, } - hashes, err := e.machineHashesWithStepSize(ctx, machineHashesWithStepSizeArgs{ - fromBatch: 0, - stepSize: 1, - requiredNumHashes: 1, - startIndex: 0, - getMachineAtIndex: machGetter, - }) + machStartIndex := uint64(0) + stepSize := uint64(1) + numRequiredHashes := uint64(1) + e.cache = &MachineCache{ + buildingLock: make(chan struct{}, 1), + machines: []MachineInterface{mm}, + finalMachine: mm, + } + go func() { + <-time.After(time.Millisecond * 50) + e.cache.buildingLock <- struct{}{} + }() + hashes, err := e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, numRequiredHashes) if err != nil { t.Fatal(err) } @@ -89,14 +114,19 @@ func Test_machineHashesWithStep(t *testing.T) { } mm.gs = initialGs mm.totalSteps = 20 + machStartIndex := uint64(0) stepSize := uint64(5) - hashes, err := e.machineHashesWithStepSize(ctx, machineHashesWithStepSizeArgs{ - fromBatch: 1, - stepSize: stepSize, - requiredNumHashes: 4, - startIndex: 0, - getMachineAtIndex: machGetter, - }) + numRequiredHashes := uint64(4) + e.cache = &MachineCache{ + buildingLock: make(chan struct{}, 1), + machines: []MachineInterface{mm}, + finalMachine: mm, + } + go func() { + <-time.After(time.Millisecond * 50) + e.cache.buildingLock <- struct{}{} + }() + hashes, err := e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, numRequiredHashes) if err != nil { t.Fatal(err) } @@ -128,14 +158,19 @@ func Test_machineHashesWithStep(t *testing.T) { } mm.gs = initialGs mm.totalSteps = 20 + machStartIndex := uint64(0) stepSize := uint64(5) - hashes, err := e.machineHashesWithStepSize(ctx, machineHashesWithStepSizeArgs{ - fromBatch: 1, - stepSize: stepSize, - requiredNumHashes: 10, - startIndex: 0, - getMachineAtIndex: machGetter, - }) + numRequiredHashes := uint64(10) + e.cache = &MachineCache{ + buildingLock: make(chan struct{}, 1), + machines: []MachineInterface{mm}, + finalMachine: mm, + } + go func() { + <-time.After(time.Millisecond * 50) + e.cache.buildingLock <- struct{}{} + }() + hashes, err := e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, numRequiredHashes) if err != nil { t.Fatal(err) } diff --git a/validator/server_arb/machine_cache.go b/validator/server_arb/machine_cache.go index 23fcdef6d6..b656f1d0ee 100644 --- a/validator/server_arb/machine_cache.go +++ b/validator/server_arb/machine_cache.go @@ -124,6 +124,7 @@ func (c *MachineCache) lockBuild(ctx context.Context) error { return ctx.Err() case <-c.buildingLock: } + fmt.Println("Got past the lock") err := c.err if err != nil { c.buildingLock <- struct{}{} diff --git a/validator/valnode/validation_api.go b/validator/valnode/validation_api.go index 075ab44397..a2ca8cfc70 100644 --- a/validator/valnode/validation_api.go +++ b/validator/valnode/validation_api.go @@ -148,12 +148,12 @@ func (a *ExecServerAPI) GetStepAt(ctx context.Context, execid uint64, position u return server_api.MachineStepResultToJson(res), nil } -func (a *ExecServerAPI) GetMachineHashesWithStepSize(ctx context.Context, execid, fromBatch, fromStep, stepSize, numDesiredLeaves uint64) ([]common.Hash, error) { +func (a *ExecServerAPI) GetMachineHashesWithStepSize(ctx context.Context, execid, fromStep, stepSize, numRequiredHashes uint64) ([]common.Hash, error) { run, err := a.getRun(execid) if err != nil { return nil, err } - leavesInRange := run.GetMachineHashesWithStepSize(fromBatch, fromStep, stepSize, numDesiredLeaves) + leavesInRange := run.GetMachineHashesWithStepSize(fromStep, stepSize, numRequiredHashes) res, err := leavesInRange.Await(ctx) if err != nil { return nil, err From 65e1b573ebee8452b9850aa23948a15688106c66 Mon Sep 17 00:00:00 2001 From: Raul Jordan Date: Mon, 17 Jun 2024 11:21:02 -0500 Subject: [PATCH 14/72] tests passing --- validator/server_arb/execution_run_test.go | 14 ++++++++++---- validator/server_arb/machine_cache.go | 1 - 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/validator/server_arb/execution_run_test.go b/validator/server_arb/execution_run_test.go index 24e401f351..0f047db39e 100644 --- a/validator/server_arb/execution_run_test.go +++ b/validator/server_arb/execution_run_test.go @@ -37,10 +37,13 @@ func (m *mockMachine) Step(ctx context.Context, stepSize uint64) error { } func (m *mockMachine) CloneMachineInterface() MachineInterface { - return m + return &mockMachine{ + gs: validator.GoGlobalState{Batch: m.gs.Batch, PosInBatch: m.gs.PosInBatch}, + totalSteps: m.totalSteps, + } } func (m *mockMachine) GetStepCount() uint64 { - return m.totalSteps + return 0 } func (m *mockMachine) IsRunning() bool { return m.gs.PosInBatch < m.totalSteps-1 @@ -187,8 +190,11 @@ func Test_machineHashesWithStep(t *testing.T) { expectedHashes = append(expectedHashes, gs.Hash()) } // The rest of the expected hashes should be the machine finished hash repeated. - for i := uint64(4); i < 10; i++ { - expectedHashes = append(expectedHashes, machineFinishedHash(mm.gs)) + for len(expectedHashes) < 10 { + expectedHashes = append(expectedHashes, machineFinishedHash(validator.GoGlobalState{ + Batch: 1, + PosInBatch: mm.totalSteps - 1, + })) } if len(hashes) != len(expectedHashes) { t.Fatal("Wanted one hash") diff --git a/validator/server_arb/machine_cache.go b/validator/server_arb/machine_cache.go index b656f1d0ee..23fcdef6d6 100644 --- a/validator/server_arb/machine_cache.go +++ b/validator/server_arb/machine_cache.go @@ -124,7 +124,6 @@ func (c *MachineCache) lockBuild(ctx context.Context) error { return ctx.Err() case <-c.buildingLock: } - fmt.Println("Got past the lock") err := c.err if err != nil { c.buildingLock <- struct{}{} From f9484da8cf59cb54a9476018141e677f0ef1815a Mon Sep 17 00:00:00 2001 From: Raul Jordan Date: Mon, 17 Jun 2024 12:24:08 -0500 Subject: [PATCH 15/72] edit --- validator/server_arb/execution_run.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validator/server_arb/execution_run.go b/validator/server_arb/execution_run.go index 1d765688d5..50c9b5608e 100644 --- a/validator/server_arb/execution_run.go +++ b/validator/server_arb/execution_run.go @@ -152,7 +152,7 @@ func (e *executionRun) machineHashesWithStepSize( "timeSinceStart", time.Since(start), "stepSize", stepSize, "startHash", startHash, - "machineStartIndex", machine, + "machineStartIndex", machineStartIndex, "numDesiredLeaves", requiredNumHashes, ) } From bfa498dcc0707aab285ff17e1deb4dec0df94692 Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Mon, 17 Jun 2024 17:41:28 -0700 Subject: [PATCH 16/72] Fix ExpirationPolicy now that there is expiry --- das/local_file_storage_service.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/das/local_file_storage_service.go b/das/local_file_storage_service.go index de3a1379da..0e2a381cd5 100644 --- a/das/local_file_storage_service.go +++ b/das/local_file_storage_service.go @@ -213,6 +213,9 @@ func (s *LocalFileStorageService) Sync(ctx context.Context) error { } func (s *LocalFileStorageService) ExpirationPolicy(ctx context.Context) (daprovider.ExpirationPolicy, error) { + if s.config.EnableExpiry { + return daprovider.DiscardAfterDataTimeout, nil + } return daprovider.KeepForever, nil } From 5ccda16f22be321356cb9d8f04825640e40ff579 Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Mon, 17 Jun 2024 19:15:25 -0700 Subject: [PATCH 17/72] Deprecate local-db-storage, add migration opt The local DB storage backend, based on badgerdb, is being deprecated due to its unfavorable memory performance when compacting. local-file-storage is now the recommended local storage instead. To migrate to it, use --data-availability.migrate-local-db-to-file-storage and enable the local-file-storage service. Migration must be enabled manually using this flag since the operator may need to set up a new storage directory and backing volume. In-process migration is only provided from local-db-storage to local-file-storage since it is the only other local storage backend we current provide. To migrate to other backends, start a new daserver instance and use the rest-aggregator.sync-to-storage options. In the case where both S3 and DB backends were being used, it is recommended to migrate to the file backend and retain S3 instead of just dropping the DB backend configuration, otherwise there would no longer be a local store. ERROR and WARN messages on startup will inform operators that they should migrate (but can continue anyway), if migration failed, or if migration had previously succeeded and config/storage cleanup is recommended. These messages will be made fatal in the future before the local-db-storage backend code is removed entirely for the full deprecation. --- das/das.go | 3 ++ das/db_storage_service.go | 96 ++++++++++++++++++++++++++++++++++++++- das/factory.go | 36 ++++++++++----- 3 files changed, 122 insertions(+), 13 deletions(-) diff --git a/das/das.go b/das/das.go index fea1e6c6a2..658a5178c2 100644 --- a/das/das.go +++ b/das/das.go @@ -45,6 +45,8 @@ type DataAvailabilityConfig struct { LocalFileStorage LocalFileStorageConfig `koanf:"local-file-storage"` S3Storage S3StorageServiceConfig `koanf:"s3-storage"` + MigrateLocalDBToFileStorage bool `koanf:"migrate-local-db-to-file-storage"` + Key KeyConfig `koanf:"key"` RPCAggregator AggregatorConfig `koanf:"rpc-aggregator"` @@ -112,6 +114,7 @@ func dataAvailabilityConfigAddOptions(prefix string, f *flag.FlagSet, r role) { LocalDBStorageConfigAddOptions(prefix+".local-db-storage", f) LocalFileStorageConfigAddOptions(prefix+".local-file-storage", f) S3ConfigAddOptions(prefix+".s3-storage", f) + f.Bool(prefix+".migrate-local-db-to-file-storage", DefaultDataAvailabilityConfig.MigrateLocalDBToFileStorage, "daserver will migrate all data on startup from local-db-storage to local-file-storage, then mark local-db-storage as unusable") // Key config for storage KeyConfigAddOptions(prefix+".key", f) diff --git a/das/db_storage_service.go b/das/db_storage_service.go index 0fbe1c2723..e3b6183c37 100644 --- a/das/db_storage_service.go +++ b/das/db_storage_service.go @@ -7,6 +7,9 @@ import ( "bytes" "context" "errors" + "fmt" + "os" + "path/filepath" "time" badger "github.com/dgraph-io/badger/v4" @@ -35,6 +38,8 @@ type LocalDBStorageConfig struct { var badgerDefaultOptions = badger.DefaultOptions("") +const migratedMarker = "MIGRATED" + var DefaultLocalDBStorageConfig = LocalDBStorageConfig{ Enable: false, DataDir: "", @@ -49,7 +54,7 @@ var DefaultLocalDBStorageConfig = LocalDBStorageConfig{ } func LocalDBStorageConfigAddOptions(prefix string, f *flag.FlagSet) { - f.Bool(prefix+".enable", DefaultLocalDBStorageConfig.Enable, "enable storage/retrieval of sequencer batch data from a database on the local filesystem") + f.Bool(prefix+".enable", DefaultLocalDBStorageConfig.Enable, "!!!DEPRECATED, USE local-file-storage!!! enable storage/retrieval of sequencer batch data from a database on the local filesystem") f.String(prefix+".data-dir", DefaultLocalDBStorageConfig.DataDir, "directory in which to store the database") f.Bool(prefix+".discard-after-timeout", DefaultLocalDBStorageConfig.DiscardAfterTimeout, "discard data after its expiry timeout") @@ -69,7 +74,17 @@ type DBStorageService struct { stopWaiter stopwaiter.StopWaiterSafe } -func NewDBStorageService(ctx context.Context, config *LocalDBStorageConfig) (StorageService, error) { +// The DBStorageService is deprecated. This function will migrate data to the target +// LocalFileStorageService if it is provided and migration hasn't already happened. +func NewDBStorageService(ctx context.Context, config *LocalDBStorageConfig, target *LocalFileStorageService) (*DBStorageService, error) { + if alreadyMigrated(config.DataDir) { + log.Warn("local-db-storage already migrated, please remove it from the daserver configuration and restart. data-dir can be cleaned up manually now") + return nil, nil + } + if target == nil { + log.Error("local-db-storage is DEPRECATED, please use use the local-file-storage and migrate-local-db-to-file-storage options. This error will be made fatal in future, continuing for now...") + } + options := badger.DefaultOptions(config.DataDir). WithNumMemtables(config.NumMemtables). WithNumLevelZeroTables(config.NumLevelZeroTables). @@ -87,9 +102,21 @@ func NewDBStorageService(ctx context.Context, config *LocalDBStorageConfig) (Sto discardAfterTimeout: config.DiscardAfterTimeout, dirPath: config.DataDir, } + + if target != nil { + if err = ret.migrateTo(ctx, target); err != nil { + return nil, fmt.Errorf("error migrating local-db-storage to %s: %w", target, err) + } + if err = ret.setMigrated(); err != nil { + return nil, fmt.Errorf("error finalizing migration of local-db-storage to %s: %w", target, err) + } + return nil, nil + } + if err := ret.stopWaiter.Start(ctx, ret); err != nil { return nil, err } + err = ret.stopWaiter.LaunchThreadSafe(func(myCtx context.Context) { ticker := time.NewTicker(5 * time.Minute) defer ticker.Stop() @@ -152,6 +179,48 @@ func (dbs *DBStorageService) Put(ctx context.Context, data []byte, timeout uint6 }) } +func (dbs *DBStorageService) migrateTo(ctx context.Context, s StorageService) error { + originExpirationPolicy, err := dbs.ExpirationPolicy(ctx) + if err != nil { + return err + } + targetExpirationPolicy, err := s.ExpirationPolicy(ctx) + if err != nil { + return err + } + + if originExpirationPolicy == daprovider.KeepForever && targetExpirationPolicy == daprovider.DiscardAfterDataTimeout { + return errors.New("can't migrate from DBStorageService to target, incompatible expiration policies - can't migrate from non-expiring to expiring since non-expiring DB lacks expiry time metadata") + } + + return dbs.db.View(func(txn *badger.Txn) error { + opts := badger.DefaultIteratorOptions + it := txn.NewIterator(opts) + defer it.Close() + log.Info("Migrating from DBStorageService", "target", s) + migrationStart := time.Now() + count := 0 + for it.Rewind(); it.Valid(); it.Next() { + if count%1000 == 0 { + log.Info("Migration in progress", "migrated", count) + } + item := it.Item() + k := item.Key() + expiry := item.ExpiresAt() + err := item.Value(func(v []byte) error { + log.Trace("migrated", "key", pretty.FirstFewBytes(k), "value", pretty.FirstFewBytes(v), "expiry", expiry) + return s.Put(ctx, v, expiry) + }) + if err != nil { + return err + } + count++ + } + log.Info("Migration from DBStorageService complete", "target", s, "migrated", count, "duration", time.Since(migrationStart)) + return nil + }) +} + func (dbs *DBStorageService) Sync(ctx context.Context) error { return dbs.db.Sync() } @@ -160,6 +229,29 @@ func (dbs *DBStorageService) Close(ctx context.Context) error { return dbs.stopWaiter.StopAndWait() } +func alreadyMigrated(dirPath string) bool { + migratedMarkerFile := filepath.Join(dirPath, migratedMarker) + _, err := os.Stat(migratedMarkerFile) + if os.IsNotExist(err) { + return false + } + if err != nil { + log.Error("error checking if local-db-storage is already migrated", "err", err) + return false + } + return true +} + +func (dbs *DBStorageService) setMigrated() error { + migratedMarkerFile := filepath.Join(dbs.dirPath, migratedMarker) + file, err := os.OpenFile(migratedMarkerFile, os.O_CREATE|os.O_WRONLY, 0o600) + if err != nil { + return err + } + file.Close() + return nil +} + func (dbs *DBStorageService) ExpirationPolicy(ctx context.Context) (daprovider.ExpirationPolicy, error) { if dbs.discardAfterTimeout { return daprovider.DiscardAfterDataTimeout, nil diff --git a/das/factory.go b/das/factory.go index 8f6432234d..d640cee9fb 100644 --- a/das/factory.go +++ b/das/factory.go @@ -25,26 +25,36 @@ func CreatePersistentStorageService( ) (StorageService, *LifecycleManager, error) { storageServices := make([]StorageService, 0, 10) var lifecycleManager LifecycleManager - if config.LocalDBStorage.Enable { - s, err := NewDBStorageService(ctx, &config.LocalDBStorage) + var err error + + var fs *LocalFileStorageService + if config.LocalFileStorage.Enable { + fs, err = NewLocalFileStorageService(config.LocalFileStorage) if err != nil { return nil, nil, err } - lifecycleManager.Register(s) - storageServices = append(storageServices, s) - } - - if config.LocalFileStorage.Enable { - s, err := NewLocalFileStorageService(config.LocalFileStorage) + err = fs.start(ctx) if err != nil { return nil, nil, err } - err = s.start(ctx) + lifecycleManager.Register(fs) + storageServices = append(storageServices, fs) + } + + if config.LocalDBStorage.Enable { + var s *DBStorageService + if config.MigrateLocalDBToFileStorage { + s, err = NewDBStorageService(ctx, &config.LocalDBStorage, fs) + } else { + s, err = NewDBStorageService(ctx, &config.LocalDBStorage, nil) + } if err != nil { return nil, nil, err } - lifecycleManager.Register(s) - storageServices = append(storageServices, s) + if s != nil { + lifecycleManager.Register(s) + storageServices = append(storageServices, s) + } } if config.S3Storage.Enable { @@ -67,6 +77,10 @@ func CreatePersistentStorageService( if len(storageServices) == 1 { return storageServices[0], &lifecycleManager, nil } + if len(storageServices) == 0 { + return nil, nil, errors.New("No data-availability storage backend has been configured") + } + return nil, &lifecycleManager, nil } From 1f67c39d19b85d5fd9610178766326b5ba244747 Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Tue, 18 Jun 2024 11:49:58 -0700 Subject: [PATCH 18/72] Add missed error return --- das/local_file_storage_service.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/das/local_file_storage_service.go b/das/local_file_storage_service.go index 0e2a381cd5..6b0a5f0070 100644 --- a/das/local_file_storage_service.go +++ b/das/local_file_storage_service.go @@ -437,14 +437,14 @@ func (tl *trieLayout) prune(pruneTil time.Time) error { if stat.Nlink == 1 { err = recursivelyDeleteUntil(pathByHash, byDataHash) if err != nil { - + return err } } pruned++ } if pruned > 0 { - log.Info("local file store pruned expired batches", "count", pruned, "pruneTil", pruneTil, "duration", time.Since(pruningStart)) + log.Info("Local file store pruned expired batches", "count", pruned, "pruneTil", pruneTil, "duration", time.Since(pruningStart)) } return nil } From f551f1f7c9360f85ab1e958be567c125e58fd7c6 Mon Sep 17 00:00:00 2001 From: Ganesh Vanahalli Date: Tue, 18 Jun 2024 17:22:13 -0500 Subject: [PATCH 19/72] Don't post a batch that would cause a reorg due to being near the layer 1 minimum block or timestamp bounds --- arbnode/batch_poster.go | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/arbnode/batch_poster.go b/arbnode/batch_poster.go index 60693689fe..5c35c25e2d 100644 --- a/arbnode/batch_poster.go +++ b/arbnode/batch_poster.go @@ -162,6 +162,7 @@ type BatchPosterConfig struct { UseAccessLists bool `koanf:"use-access-lists" reload:"hot"` GasEstimateBaseFeeMultipleBips arbmath.Bips `koanf:"gas-estimate-base-fee-multiple-bips"` Dangerous BatchPosterDangerousConfig `koanf:"dangerous"` + ReorgResistanceMargin time.Duration `koanf:"reorg-resistance-margin" reload:"hot"` gasRefunder common.Address l1BlockBound l1BlockBound @@ -213,6 +214,7 @@ func BatchPosterConfigAddOptions(prefix string, f *pflag.FlagSet) { f.Duration(prefix+".l1-block-bound-bypass", DefaultBatchPosterConfig.L1BlockBoundBypass, "post batches even if not within the layer 1 future bounds if we're within this margin of the max delay") f.Bool(prefix+".use-access-lists", DefaultBatchPosterConfig.UseAccessLists, "post batches with access lists to reduce gas usage (disabled for L3s)") f.Uint64(prefix+".gas-estimate-base-fee-multiple-bips", uint64(DefaultBatchPosterConfig.GasEstimateBaseFeeMultipleBips), "for gas estimation, use this multiple of the basefee (measured in basis points) as the max fee per gas") + f.Duration(prefix+".reorg-resistance-margin", DefaultBatchPosterConfig.ReorgResistanceMargin, "do not post batch if its over the layer 1 minimum bounds but within this duration from them") redislock.AddConfigOptions(prefix+".redis-lock", f) dataposter.DataPosterConfigAddOptions(prefix+".data-poster", f, dataposter.DefaultDataPosterConfig) genericconf.WalletConfigAddOptions(prefix+".parent-chain-wallet", f, DefaultBatchPosterConfig.ParentChainWallet.Pathname) @@ -242,6 +244,7 @@ var DefaultBatchPosterConfig = BatchPosterConfig{ UseAccessLists: true, RedisLock: redislock.DefaultCfg, GasEstimateBaseFeeMultipleBips: arbmath.OneInBips * 3 / 2, + ReorgResistanceMargin: 10 * time.Minute, } var DefaultBatchPosterL1WalletConfig = genericconf.WalletConfig{ @@ -1238,7 +1241,24 @@ func (b *BatchPoster) maybePostSequencerBatch(ctx context.Context) (bool, error) b.building.msgCount++ } - if !forcePostBatch || !b.building.haveUsefulMessage { + var disablePosting bool + firstMsgTimeStamp := firstMsg.Message.Header.Timestamp + firstMsgBlockNumber := firstMsg.Message.Header.BlockNumber + batchNearL1BoundMinTimestamp := firstMsgTimeStamp >= l1BoundMinTimestamp && firstMsgTimeStamp <= l1BoundMinTimestamp+uint64(config.ReorgResistanceMargin/time.Second) + batchNearL1BoundMinBlockNumber := firstMsgBlockNumber >= l1BoundMinBlockNumber && firstMsgBlockNumber <= l1BoundMinBlockNumber+uint64(config.ReorgResistanceMargin/ethPosBlockTime) + if config.ReorgResistanceMargin > 0 && (batchNearL1BoundMinTimestamp || batchNearL1BoundMinBlockNumber) { + log.Error( + "Disabling batch posting due to batch being within reorg resistance margin from layer 1 minimum block or timestamp bounds", + "reorgResistanceMargin", config.ReorgResistanceMargin, + "firstMsgTimeStamp", firstMsgTimeStamp, + "l1BoundMinTimestamp", l1BoundMinTimestamp, + "firstMsgBlockNumber", firstMsgBlockNumber, + "l1BoundMinBlockNumber", l1BoundMinBlockNumber, + ) + disablePosting = true + } + + if disablePosting || !forcePostBatch || !b.building.haveUsefulMessage { // the batch isn't full yet and we've posted a batch recently // don't post anything for now return false, nil From d13e42ba1f3e4a3e9fe29737272ccb3564e8a345 Mon Sep 17 00:00:00 2001 From: Ganesh Vanahalli Date: Thu, 20 Jun 2024 10:26:25 -0500 Subject: [PATCH 20/72] address PR comments --- arbnode/batch_poster.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arbnode/batch_poster.go b/arbnode/batch_poster.go index 5c35c25e2d..779958865d 100644 --- a/arbnode/batch_poster.go +++ b/arbnode/batch_poster.go @@ -1241,11 +1241,10 @@ func (b *BatchPoster) maybePostSequencerBatch(ctx context.Context) (bool, error) b.building.msgCount++ } - var disablePosting bool firstMsgTimeStamp := firstMsg.Message.Header.Timestamp firstMsgBlockNumber := firstMsg.Message.Header.BlockNumber - batchNearL1BoundMinTimestamp := firstMsgTimeStamp >= l1BoundMinTimestamp && firstMsgTimeStamp <= l1BoundMinTimestamp+uint64(config.ReorgResistanceMargin/time.Second) - batchNearL1BoundMinBlockNumber := firstMsgBlockNumber >= l1BoundMinBlockNumber && firstMsgBlockNumber <= l1BoundMinBlockNumber+uint64(config.ReorgResistanceMargin/ethPosBlockTime) + batchNearL1BoundMinTimestamp := firstMsgTimeStamp <= l1BoundMinTimestamp+uint64(config.ReorgResistanceMargin/time.Second) + batchNearL1BoundMinBlockNumber := firstMsgBlockNumber <= l1BoundMinBlockNumber+uint64(config.ReorgResistanceMargin/ethPosBlockTime) if config.ReorgResistanceMargin > 0 && (batchNearL1BoundMinTimestamp || batchNearL1BoundMinBlockNumber) { log.Error( "Disabling batch posting due to batch being within reorg resistance margin from layer 1 minimum block or timestamp bounds", @@ -1255,10 +1254,10 @@ func (b *BatchPoster) maybePostSequencerBatch(ctx context.Context) (bool, error) "firstMsgBlockNumber", firstMsgBlockNumber, "l1BoundMinBlockNumber", l1BoundMinBlockNumber, ) - disablePosting = true + return false, errors.New("batch is within reorg resistance margin from layer 1 minimum block or timestamp bounds") } - if disablePosting || !forcePostBatch || !b.building.haveUsefulMessage { + if !forcePostBatch || !b.building.haveUsefulMessage { // the batch isn't full yet and we've posted a batch recently // don't post anything for now return false, nil From ae1dbcc22e9e57c772889a6eae12a864b46ce782 Mon Sep 17 00:00:00 2001 From: Ganesh Vanahalli Date: Thu, 20 Jun 2024 10:29:37 -0500 Subject: [PATCH 21/72] fix flag description --- arbnode/batch_poster.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arbnode/batch_poster.go b/arbnode/batch_poster.go index 779958865d..2d788ac686 100644 --- a/arbnode/batch_poster.go +++ b/arbnode/batch_poster.go @@ -214,7 +214,7 @@ func BatchPosterConfigAddOptions(prefix string, f *pflag.FlagSet) { f.Duration(prefix+".l1-block-bound-bypass", DefaultBatchPosterConfig.L1BlockBoundBypass, "post batches even if not within the layer 1 future bounds if we're within this margin of the max delay") f.Bool(prefix+".use-access-lists", DefaultBatchPosterConfig.UseAccessLists, "post batches with access lists to reduce gas usage (disabled for L3s)") f.Uint64(prefix+".gas-estimate-base-fee-multiple-bips", uint64(DefaultBatchPosterConfig.GasEstimateBaseFeeMultipleBips), "for gas estimation, use this multiple of the basefee (measured in basis points) as the max fee per gas") - f.Duration(prefix+".reorg-resistance-margin", DefaultBatchPosterConfig.ReorgResistanceMargin, "do not post batch if its over the layer 1 minimum bounds but within this duration from them") + f.Duration(prefix+".reorg-resistance-margin", DefaultBatchPosterConfig.ReorgResistanceMargin, "do not post batch if its within this duration from layer 1 minimum bounds ") redislock.AddConfigOptions(prefix+".redis-lock", f) dataposter.DataPosterConfigAddOptions(prefix+".data-poster", f, dataposter.DefaultDataPosterConfig) genericconf.WalletConfigAddOptions(prefix+".parent-chain-wallet", f, DefaultBatchPosterConfig.ParentChainWallet.Pathname) From 5ed59b6ddd6350f7f3a9ff5d8d1b5306abab0666 Mon Sep 17 00:00:00 2001 From: Raul Jordan Date: Mon, 24 Jun 2024 09:07:33 -0500 Subject: [PATCH 22/72] feedback --- validator/server_arb/execution_run_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/validator/server_arb/execution_run_test.go b/validator/server_arb/execution_run_test.go index 0f047db39e..f5620b7aa1 100644 --- a/validator/server_arb/execution_run_test.go +++ b/validator/server_arb/execution_run_test.go @@ -74,12 +74,12 @@ func Test_machineHashesWithStep(t *testing.T) { numRequiredHashes := uint64(0) _, err := e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, numRequiredHashes) if !strings.Contains(err.Error(), "step size cannot be 0") { - t.Fatal("Wrong error") + t.Error("Wrong error") } stepSize = uint64(1) _, err = e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, numRequiredHashes) if !strings.Contains(err.Error(), "required number of hashes cannot be 0") { - t.Fatal("Wrong error") + t.Error("Wrong error") } }) t.Run("machine at start index 0 hash is the finished state hash", func(t *testing.T) { @@ -104,10 +104,10 @@ func Test_machineHashesWithStep(t *testing.T) { } expected := machineFinishedHash(mm.gs) if len(hashes) != 1 { - t.Fatal("Wanted one hash") + t.Error("Wanted one hash") } if expected != hashes[0] { - t.Fatalf("Wanted %#x, got %#x", expected, hashes[0]) + t.Errorf("Wanted %#x, got %#x", expected, hashes[0]) } }) t.Run("can step in step size increments and collect hashes", func(t *testing.T) { @@ -150,7 +150,7 @@ func Test_machineHashesWithStep(t *testing.T) { } for i := range hashes { if expectedHashes[i] != hashes[i] { - t.Fatalf("Wanted at index %d, %#x, got %#x", i, expectedHashes[i], hashes[i]) + t.Errorf("Wanted at index %d, %#x, got %#x", i, expectedHashes[i], hashes[i]) } } }) @@ -201,7 +201,7 @@ func Test_machineHashesWithStep(t *testing.T) { } for i := range hashes { if expectedHashes[i] != hashes[i] { - t.Fatalf("Wanted at index %d, %#x, got %#x", i, expectedHashes[i], hashes[i]) + t.Errorf("Wanted at index %d, %#x, got %#x", i, expectedHashes[i], hashes[i]) } } }) From f7ed4f0cdd06c31daadbfc40c08aa01868dfaa21 Mon Sep 17 00:00:00 2001 From: Raul Jordan Date: Mon, 24 Jun 2024 09:26:53 -0500 Subject: [PATCH 23/72] feedback --- validator/server_arb/execution_run.go | 39 +++++++++++++-------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/validator/server_arb/execution_run.go b/validator/server_arb/execution_run.go index 50c9b5608e..89aed3fcf4 100644 --- a/validator/server_arb/execution_run.go +++ b/validator/server_arb/execution_run.go @@ -83,9 +83,9 @@ func (e *executionRun) GetStepAt(position uint64) containers.PromiseInterface[*v }) } -func (e *executionRun) GetMachineHashesWithStepSize(machineStartIndex, stepSize, requiredNumHashes uint64) containers.PromiseInterface[[]common.Hash] { +func (e *executionRun) GetMachineHashesWithStepSize(machineStartIndex, stepSize, maxIterations uint64) containers.PromiseInterface[[]common.Hash] { return stopwaiter.LaunchPromiseThread(e, func(ctx context.Context) ([]common.Hash, error) { - return e.machineHashesWithStepSize(ctx, machineStartIndex, stepSize, requiredNumHashes) + return e.machineHashesWithStepSize(ctx, machineStartIndex, stepSize, maxIterations) }) } @@ -93,13 +93,13 @@ func (e *executionRun) machineHashesWithStepSize( ctx context.Context, machineStartIndex, stepSize, - requiredNumHashes uint64, + maxIterations uint64, ) ([]common.Hash, error) { if stepSize == 0 { return nil, fmt.Errorf("step size cannot be 0") } - if requiredNumHashes == 0 { - return nil, fmt.Errorf("required number of hashes cannot be 0") + if maxIterations == 0 { + return nil, fmt.Errorf("max number of iterations cannot be 0") } machine, err := e.cache.GetMachineAt(ctx, machineStartIndex) if err != nil { @@ -107,8 +107,7 @@ func (e *executionRun) machineHashesWithStepSize( } log.Debug(fmt.Sprintf("Advanced machine to index %d, beginning hash computation", machineStartIndex)) - // If the machine is starting at index 0, we always want to start at the "Machine finished" global state status - // to align with the machine hashes that the inbox machine will produce. + // A var machineHashes []common.Hash if machineStartIndex == 0 { gs := machine.GetGlobalState() @@ -121,43 +120,43 @@ func (e *executionRun) machineHashesWithStepSize( startHash := machineHashes[0] // If we only want 1 hash, we can return early. - if requiredNumHashes == 1 { + if maxIterations == 1 { return machineHashes, nil } - logInterval := requiredNumHashes / 20 // Log every 5% progress + logInterval := maxIterations / 20 // Log every 5% progress if logInterval == 0 { logInterval = 1 } start := time.Now() - for numIterations := uint64(0); numIterations < requiredNumHashes; numIterations++ { + for i := uint64(0); i < maxIterations; i++ { // The absolute program counter the machine should be in after stepping. - absoluteMachineIndex := machineStartIndex + stepSize*(numIterations+1) + absoluteMachineIndex := machineStartIndex + stepSize*(i+1) // Advance the machine in step size increments. if err := machine.Step(ctx, stepSize); err != nil { return nil, fmt.Errorf("failed to step machine to position %d: %w", absoluteMachineIndex, err) } - if numIterations%logInterval == 0 || numIterations == requiredNumHashes-1 { - progressPercent := (float64(numIterations+1) / float64(requiredNumHashes)) * 100 + if i%logInterval == 0 || i == maxIterations-1 { + progressPercent := (float64(i+1) / float64(maxIterations)) * 100 log.Info( fmt.Sprintf( - "Computing BOLD subchallenge progress: %.2f%% - %d of %d hashes needed", + "Computing BOLD subchallenge progress: %.2f%% - %d of %d hashes", progressPercent, - numIterations+1, - requiredNumHashes, + i+1, + maxIterations, ), - "machinePosition", numIterations*stepSize+machineStartIndex, + "machinePosition", i*stepSize+machineStartIndex, "timeSinceStart", time.Since(start), "stepSize", stepSize, "startHash", startHash, "machineStartIndex", machineStartIndex, - "numDesiredLeaves", requiredNumHashes, + "maxIterations", maxIterations, ) } machineHashes = append(machineHashes, machine.Hash()) - if uint64(len(machineHashes)) == requiredNumHashes { + if uint64(len(machineHashes)) == maxIterations { break } } @@ -166,7 +165,7 @@ func (e *executionRun) machineHashesWithStepSize( "stepSize", stepSize, "startHash", startHash, "machineStartIndex", machineStartIndex, - "numDesiredLeaves", requiredNumHashes, + "maxIterations", maxIterations, "finishedHash", machineHashes[len(machineHashes)-1], "finishedGlobalState", fmt.Sprintf("%+v", machine.GetGlobalState()), ) From 85d0e8de64e54d3004cd1aa9f76f73c6f435761d Mon Sep 17 00:00:00 2001 From: Raul Jordan Date: Mon, 24 Jun 2024 09:58:02 -0500 Subject: [PATCH 24/72] commentary --- validator/server_arb/execution_run.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/validator/server_arb/execution_run.go b/validator/server_arb/execution_run.go index 89aed3fcf4..8bdce145a2 100644 --- a/validator/server_arb/execution_run.go +++ b/validator/server_arb/execution_run.go @@ -107,7 +107,10 @@ func (e *executionRun) machineHashesWithStepSize( } log.Debug(fmt.Sprintf("Advanced machine to index %d, beginning hash computation", machineStartIndex)) - // A + // In BOLD, the hash of a machine at index 0 is a special hash that is computed as the + // `machineFinishedHash(gs)` where `gs` is the global state of the machine at index 0. + // This is so that the hash aligns with the start state of the claimed challenge edge + // at the level above, as required by the BOLD protocol. var machineHashes []common.Hash if machineStartIndex == 0 { gs := machine.GetGlobalState() @@ -157,6 +160,11 @@ func (e *executionRun) machineHashesWithStepSize( } machineHashes = append(machineHashes, machine.Hash()) if uint64(len(machineHashes)) == maxIterations { + log.Info("Reached the max number of iterations for the hashes needed to open a subchallenge") + break + } + if !machine.IsRunning() { + log.Info("Machine no longer running, exiting early from hash computation loop") break } } @@ -165,6 +173,7 @@ func (e *executionRun) machineHashesWithStepSize( "stepSize", stepSize, "startHash", startHash, "machineStartIndex", machineStartIndex, + "numberOfHashesComputed", len(machineHashes), "maxIterations", maxIterations, "finishedHash", machineHashes[len(machineHashes)-1], "finishedGlobalState", fmt.Sprintf("%+v", machine.GetGlobalState()), From bf60a37dde49cb2fbe3d0493c3fe72ac1f7848d4 Mon Sep 17 00:00:00 2001 From: Raul Jordan Date: Mon, 24 Jun 2024 10:14:50 -0500 Subject: [PATCH 25/72] rename --- validator/server_arb/execution_run_test.go | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/validator/server_arb/execution_run_test.go b/validator/server_arb/execution_run_test.go index f5620b7aa1..3188c84a32 100644 --- a/validator/server_arb/execution_run_test.go +++ b/validator/server_arb/execution_run_test.go @@ -154,7 +154,7 @@ func Test_machineHashesWithStep(t *testing.T) { } } }) - t.Run("if finishes execution early, simply pads the remaining desired hashes with the machine finished hash", func(t *testing.T) { + t.Run("if finishes execution early, can return a smaller number of hashes than the expected max iterations", func(t *testing.T) { initialGs := validator.GoGlobalState{ Batch: 1, PosInBatch: 0, @@ -189,15 +189,8 @@ func Test_machineHashesWithStep(t *testing.T) { } expectedHashes = append(expectedHashes, gs.Hash()) } - // The rest of the expected hashes should be the machine finished hash repeated. - for len(expectedHashes) < 10 { - expectedHashes = append(expectedHashes, machineFinishedHash(validator.GoGlobalState{ - Batch: 1, - PosInBatch: mm.totalSteps - 1, - })) - } - if len(hashes) != len(expectedHashes) { - t.Fatal("Wanted one hash") + if len(hashes) >= int(numRequiredHashes) { + t.Fatal("Wanted fewer hashes than the max iterations") } for i := range hashes { if expectedHashes[i] != hashes[i] { From 27e4a826816db2ae59f3eb1f1e0cc84034504d68 Mon Sep 17 00:00:00 2001 From: Raul Jordan Date: Mon, 24 Jun 2024 10:46:40 -0500 Subject: [PATCH 26/72] commentary --- validator/server_arb/execution_run_test.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/validator/server_arb/execution_run_test.go b/validator/server_arb/execution_run_test.go index 3188c84a32..b5332e108e 100644 --- a/validator/server_arb/execution_run_test.go +++ b/validator/server_arb/execution_run_test.go @@ -78,7 +78,7 @@ func Test_machineHashesWithStep(t *testing.T) { } stepSize = uint64(1) _, err = e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, numRequiredHashes) - if !strings.Contains(err.Error(), "required number of hashes cannot be 0") { + if !strings.Contains(err.Error(), "number of iterations cannot be 0") { t.Error("Wrong error") } }) @@ -189,6 +189,10 @@ func Test_machineHashesWithStep(t *testing.T) { } expectedHashes = append(expectedHashes, gs.Hash()) } + expectedHashes = append(expectedHashes, machineFinishedHash(validator.GoGlobalState{ + Batch: 1, + PosInBatch: mm.totalSteps - 1, + })) if len(hashes) >= int(numRequiredHashes) { t.Fatal("Wanted fewer hashes than the max iterations") } From 1eff6fb335d8122383e21ae36c4ba36d2b5a11cb Mon Sep 17 00:00:00 2001 From: Raul Jordan Date: Mon, 24 Jun 2024 10:47:29 -0500 Subject: [PATCH 27/72] replace --- system_tests/validation_mock_test.go | 2 +- validator/client/validation_client.go | 4 ++-- validator/interface.go | 2 +- validator/server_arb/execution_run_test.go | 20 ++++++++++---------- validator/valnode/validation_api.go | 4 ++-- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/system_tests/validation_mock_test.go b/system_tests/validation_mock_test.go index d80a2041ec..e61fd407ec 100644 --- a/system_tests/validation_mock_test.go +++ b/system_tests/validation_mock_test.go @@ -126,7 +126,7 @@ func (r *mockExecRun) GetStepAt(position uint64) containers.PromiseInterface[*va }, nil) } -func (r *mockExecRun) GetMachineHashesWithStepSize(machineStartIndex, stepSize, numRequiredHashes uint64) containers.PromiseInterface[[]common.Hash] { +func (r *mockExecRun) GetMachineHashesWithStepSize(machineStartIndex, stepSize, maxIterations uint64) containers.PromiseInterface[[]common.Hash] { return containers.NewReadyPromise[[]common.Hash](nil, nil) } diff --git a/validator/client/validation_client.go b/validator/client/validation_client.go index 0f40ef0387..949260002d 100644 --- a/validator/client/validation_client.go +++ b/validator/client/validation_client.go @@ -197,10 +197,10 @@ func (r *ExecutionClientRun) GetStepAt(pos uint64) containers.PromiseInterface[* }) } -func (r *ExecutionClientRun) GetMachineHashesWithStepSize(machineStartIndex, stepSize, numRequiredHashes uint64) containers.PromiseInterface[[]common.Hash] { +func (r *ExecutionClientRun) GetMachineHashesWithStepSize(machineStartIndex, stepSize, maxIterations uint64) containers.PromiseInterface[[]common.Hash] { return stopwaiter.LaunchPromiseThread[[]common.Hash](r, func(ctx context.Context) ([]common.Hash, error) { var resJson []common.Hash - err := r.client.client.CallContext(ctx, &resJson, server_api.Namespace+"_getMachineHashesWithStepSize", r.id, machineStartIndex, stepSize, numRequiredHashes) + err := r.client.client.CallContext(ctx, &resJson, server_api.Namespace+"_getMachineHashesWithStepSize", r.id, machineStartIndex, stepSize, maxIterations) if err != nil { return nil, err } diff --git a/validator/interface.go b/validator/interface.go index 58ad841ff5..91668a3771 100644 --- a/validator/interface.go +++ b/validator/interface.go @@ -30,7 +30,7 @@ type ExecutionSpawner interface { type ExecutionRun interface { GetStepAt(uint64) containers.PromiseInterface[*MachineStepResult] - GetMachineHashesWithStepSize(machineStartIndex, stepSize, numRequiredHashes uint64) containers.PromiseInterface[[]common.Hash] + GetMachineHashesWithStepSize(machineStartIndex, stepSize, maxIterations uint64) containers.PromiseInterface[[]common.Hash] GetLastStep() containers.PromiseInterface[*MachineStepResult] GetProofAt(uint64) containers.PromiseInterface[[]byte] PrepareRange(uint64, uint64) containers.PromiseInterface[struct{}] diff --git a/validator/server_arb/execution_run_test.go b/validator/server_arb/execution_run_test.go index b5332e108e..bb148b5d98 100644 --- a/validator/server_arb/execution_run_test.go +++ b/validator/server_arb/execution_run_test.go @@ -71,13 +71,13 @@ func Test_machineHashesWithStep(t *testing.T) { t.Run("basic argument checks", func(t *testing.T) { machStartIndex := uint64(0) stepSize := uint64(0) - numRequiredHashes := uint64(0) - _, err := e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, numRequiredHashes) + maxIterations := uint64(0) + _, err := e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, maxIterations) if !strings.Contains(err.Error(), "step size cannot be 0") { t.Error("Wrong error") } stepSize = uint64(1) - _, err = e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, numRequiredHashes) + _, err = e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, maxIterations) if !strings.Contains(err.Error(), "number of iterations cannot be 0") { t.Error("Wrong error") } @@ -88,7 +88,7 @@ func Test_machineHashesWithStep(t *testing.T) { } machStartIndex := uint64(0) stepSize := uint64(1) - numRequiredHashes := uint64(1) + maxIterations := uint64(1) e.cache = &MachineCache{ buildingLock: make(chan struct{}, 1), machines: []MachineInterface{mm}, @@ -98,7 +98,7 @@ func Test_machineHashesWithStep(t *testing.T) { <-time.After(time.Millisecond * 50) e.cache.buildingLock <- struct{}{} }() - hashes, err := e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, numRequiredHashes) + hashes, err := e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, maxIterations) if err != nil { t.Fatal(err) } @@ -119,7 +119,7 @@ func Test_machineHashesWithStep(t *testing.T) { mm.totalSteps = 20 machStartIndex := uint64(0) stepSize := uint64(5) - numRequiredHashes := uint64(4) + maxIterations := uint64(4) e.cache = &MachineCache{ buildingLock: make(chan struct{}, 1), machines: []MachineInterface{mm}, @@ -129,7 +129,7 @@ func Test_machineHashesWithStep(t *testing.T) { <-time.After(time.Millisecond * 50) e.cache.buildingLock <- struct{}{} }() - hashes, err := e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, numRequiredHashes) + hashes, err := e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, maxIterations) if err != nil { t.Fatal(err) } @@ -163,7 +163,7 @@ func Test_machineHashesWithStep(t *testing.T) { mm.totalSteps = 20 machStartIndex := uint64(0) stepSize := uint64(5) - numRequiredHashes := uint64(10) + maxIterations := uint64(10) e.cache = &MachineCache{ buildingLock: make(chan struct{}, 1), machines: []MachineInterface{mm}, @@ -173,7 +173,7 @@ func Test_machineHashesWithStep(t *testing.T) { <-time.After(time.Millisecond * 50) e.cache.buildingLock <- struct{}{} }() - hashes, err := e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, numRequiredHashes) + hashes, err := e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, maxIterations) if err != nil { t.Fatal(err) } @@ -193,7 +193,7 @@ func Test_machineHashesWithStep(t *testing.T) { Batch: 1, PosInBatch: mm.totalSteps - 1, })) - if len(hashes) >= int(numRequiredHashes) { + if len(hashes) >= int(maxIterations) { t.Fatal("Wanted fewer hashes than the max iterations") } for i := range hashes { diff --git a/validator/valnode/validation_api.go b/validator/valnode/validation_api.go index a2ca8cfc70..d9711f9cb8 100644 --- a/validator/valnode/validation_api.go +++ b/validator/valnode/validation_api.go @@ -148,12 +148,12 @@ func (a *ExecServerAPI) GetStepAt(ctx context.Context, execid uint64, position u return server_api.MachineStepResultToJson(res), nil } -func (a *ExecServerAPI) GetMachineHashesWithStepSize(ctx context.Context, execid, fromStep, stepSize, numRequiredHashes uint64) ([]common.Hash, error) { +func (a *ExecServerAPI) GetMachineHashesWithStepSize(ctx context.Context, execid, fromStep, stepSize, maxIterations uint64) ([]common.Hash, error) { run, err := a.getRun(execid) if err != nil { return nil, err } - leavesInRange := run.GetMachineHashesWithStepSize(fromStep, stepSize, numRequiredHashes) + leavesInRange := run.GetMachineHashesWithStepSize(fromStep, stepSize, maxIterations) res, err := leavesInRange.Await(ctx) if err != nil { return nil, err From e853efffd328cbe07d25113506d6cd112b1c0688 Mon Sep 17 00:00:00 2001 From: Raul Jordan Date: Mon, 24 Jun 2024 11:00:20 -0500 Subject: [PATCH 28/72] rename --- validator/valnode/validation_api.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/validator/valnode/validation_api.go b/validator/valnode/validation_api.go index d9711f9cb8..3299366821 100644 --- a/validator/valnode/validation_api.go +++ b/validator/valnode/validation_api.go @@ -153,8 +153,8 @@ func (a *ExecServerAPI) GetMachineHashesWithStepSize(ctx context.Context, execid if err != nil { return nil, err } - leavesInRange := run.GetMachineHashesWithStepSize(fromStep, stepSize, maxIterations) - res, err := leavesInRange.Await(ctx) + hashesInRange := run.GetMachineHashesWithStepSize(fromStep, stepSize, maxIterations) + res, err := hashesInRange.Await(ctx) if err != nil { return nil, err } From 947af3099059ba58a13c66797fcdfa34e8c419b2 Mon Sep 17 00:00:00 2001 From: xiaohuo Date: Fri, 28 Jun 2024 18:45:03 +0800 Subject: [PATCH 29/72] refactor: breakdown query to eth_getLogs --- staker/l1_validator.go | 2 +- staker/rollup_watcher.go | 24 ++++++++++++++++++------ staker/staker.go | 4 ++++ 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/staker/l1_validator.go b/staker/l1_validator.go index d68365ede0..14c46b5ddb 100644 --- a/staker/l1_validator.go +++ b/staker/l1_validator.go @@ -381,7 +381,7 @@ func (v *L1Validator) generateNodeAction( return nil, false, nil } - successorNodes, err := v.rollup.LookupNodeChildren(ctx, stakerInfo.LatestStakedNode, stakerInfo.LatestStakedNodeHash) + successorNodes, err := v.rollup.LookupNodeChildren(ctx, stakerInfo.LatestStakedNode, stakerConfig.LogQueryRange, stakerInfo.LatestStakedNodeHash) if err != nil { return nil, false, fmt.Errorf("error looking up node %v (hash %v) children: %w", stakerInfo.LatestStakedNode, stakerInfo.LatestStakedNodeHash, err) } diff --git a/staker/rollup_watcher.go b/staker/rollup_watcher.go index 118ce15b44..1fbc783b47 100644 --- a/staker/rollup_watcher.go +++ b/staker/rollup_watcher.go @@ -20,6 +20,7 @@ import ( "github.com/ethereum/go-ethereum" "github.com/ethereum/go-ethereum/accounts/abi/bind" + "github.com/ethereum/go-ethereum/core/types" ) var rollupInitializedID common.Hash @@ -165,7 +166,7 @@ func (r *RollupWatcher) LookupNode(ctx context.Context, number uint64) (*NodeInf }, nil } -func (r *RollupWatcher) LookupNodeChildren(ctx context.Context, nodeNum uint64, nodeHash common.Hash) ([]*NodeInfo, error) { +func (r *RollupWatcher) LookupNodeChildren(ctx context.Context, nodeNum uint64, logQueryRange uint64, nodeHash common.Hash) ([]*NodeInfo, error) { node, err := r.RollupUserLogic.GetNode(r.getCallOpts(ctx), nodeNum) if err != nil { return nil, err @@ -180,17 +181,28 @@ func (r *RollupWatcher) LookupNodeChildren(ctx context.Context, nodeNum uint64, Addresses: []common.Address{r.address}, Topics: [][]common.Hash{{nodeCreatedID}, nil, {nodeHash}}, } - query.FromBlock, err = r.getNodeCreationBlock(ctx, nodeNum) + fromBlock, err := r.getNodeCreationBlock(ctx, nodeNum) if err != nil { return nil, err } - query.ToBlock, err = r.getNodeCreationBlock(ctx, node.LatestChildNumber) + toBlock, err := r.getNodeCreationBlock(ctx, node.LatestChildNumber) if err != nil { return nil, err } - logs, err := r.client.FilterLogs(ctx, query) - if err != nil { - return nil, err + var logs []types.Log + // break down the query to avoid eth_getLogs query limit + for toBlock.Cmp(fromBlock) > 0 { + query.FromBlock = fromBlock + query.ToBlock = new(big.Int).Add(fromBlock, big.NewInt(logQueryRange)) + if query.ToBlock.Cmp(toBlock) > 0 { + query.ToBlock = toBlock + } + segment, err := r.client.FilterLogs(ctx, query) + if err != nil { + return nil, err + } + logs = append(logs, segment) + fromBlock = new(big.Int).Add(query.ToBlock, big.NewInt(1)) } infos := make([]*NodeInfo, 0, len(logs)) lastHash := nodeHash diff --git a/staker/staker.go b/staker/staker.go index da6413e122..f7f412ca16 100644 --- a/staker/staker.go +++ b/staker/staker.go @@ -90,6 +90,7 @@ type L1ValidatorConfig struct { ExtraGas uint64 `koanf:"extra-gas" reload:"hot"` Dangerous DangerousConfig `koanf:"dangerous"` ParentChainWallet genericconf.WalletConfig `koanf:"parent-chain-wallet"` + LogQueryRange uint64 `koanf:"log-query-range" reload:"hot"` strategy StakerStrategy gasRefunder common.Address @@ -156,6 +157,7 @@ var DefaultL1ValidatorConfig = L1ValidatorConfig{ ExtraGas: 50000, Dangerous: DefaultDangerousConfig, ParentChainWallet: DefaultValidatorL1WalletConfig, + LogQueryRange: 999, } var TestL1ValidatorConfig = L1ValidatorConfig{ @@ -176,6 +178,7 @@ var TestL1ValidatorConfig = L1ValidatorConfig{ ExtraGas: 50000, Dangerous: DefaultDangerousConfig, ParentChainWallet: DefaultValidatorL1WalletConfig, + LogQueryRange: 999, } var DefaultValidatorL1WalletConfig = genericconf.WalletConfig{ @@ -201,6 +204,7 @@ func L1ValidatorConfigAddOptions(prefix string, f *flag.FlagSet) { f.String(prefix+".gas-refunder-address", DefaultL1ValidatorConfig.GasRefunderAddress, "The gas refunder contract address (optional)") f.String(prefix+".redis-url", DefaultL1ValidatorConfig.RedisUrl, "redis url for L1 validator") f.Uint64(prefix+".extra-gas", DefaultL1ValidatorConfig.ExtraGas, "use this much more gas than estimation says is necessary to post transactions") + f.Uint64(prefix+".log-query-range", DefaultL1ValidatorConfig.LogQueryRange, "range ro query from eth_getLogs") dataposter.DataPosterConfigAddOptions(prefix+".data-poster", f, dataposter.DefaultDataPosterConfigForValidator) DangerousConfigAddOptions(prefix+".dangerous", f) genericconf.WalletConfigAddOptions(prefix+".parent-chain-wallet", f, DefaultL1ValidatorConfig.ParentChainWallet.Pathname) From ea08ba547fc4ad87b6cbef3ebc44712b481a98d0 Mon Sep 17 00:00:00 2001 From: xiaohuo Date: Fri, 28 Jun 2024 18:51:20 +0800 Subject: [PATCH 30/72] fix: array append --- staker/rollup_watcher.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/staker/rollup_watcher.go b/staker/rollup_watcher.go index 1fbc783b47..6d871be24e 100644 --- a/staker/rollup_watcher.go +++ b/staker/rollup_watcher.go @@ -201,7 +201,7 @@ func (r *RollupWatcher) LookupNodeChildren(ctx context.Context, nodeNum uint64, if err != nil { return nil, err } - logs = append(logs, segment) + logs = append(logs, segment...) fromBlock = new(big.Int).Add(query.ToBlock, big.NewInt(1)) } infos := make([]*NodeInfo, 0, len(logs)) From 40ff63075e231db86a7dde9d3e9b3876bf98495b Mon Sep 17 00:00:00 2001 From: xiaohuo Date: Fri, 28 Jun 2024 18:55:25 +0800 Subject: [PATCH 31/72] fix: log query range type error --- staker/rollup_watcher.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/staker/rollup_watcher.go b/staker/rollup_watcher.go index 6d871be24e..4ff8db0ce3 100644 --- a/staker/rollup_watcher.go +++ b/staker/rollup_watcher.go @@ -193,7 +193,7 @@ func (r *RollupWatcher) LookupNodeChildren(ctx context.Context, nodeNum uint64, // break down the query to avoid eth_getLogs query limit for toBlock.Cmp(fromBlock) > 0 { query.FromBlock = fromBlock - query.ToBlock = new(big.Int).Add(fromBlock, big.NewInt(logQueryRange)) + query.ToBlock = new(big.Int).Add(fromBlock, big.NewInt(int64(logQueryRange))) if query.ToBlock.Cmp(toBlock) > 0 { query.ToBlock = toBlock } From 31a168d8aecccf81bcfa35af7548f9219f85f3b1 Mon Sep 17 00:00:00 2001 From: Aman Sanghi Date: Tue, 2 Jul 2024 17:29:30 +0530 Subject: [PATCH 32/72] Add test for gas estimation with RPC gas limit --- system_tests/estimation_test.go | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/system_tests/estimation_test.go b/system_tests/estimation_test.go index e7f00ca94e..0e0b329708 100644 --- a/system_tests/estimation_test.go +++ b/system_tests/estimation_test.go @@ -324,3 +324,33 @@ func TestDisableL1Charging(t *testing.T) { _, err = builder.L2.Client.CallContract(ctx, ethereum.CallMsg{To: &addr, Gas: gasWithoutL1Charging, SkipL1Charging: true}, nil) Require(t, err) } + +func TestGasEstimationWithRPCGasLimit(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + builder := NewNodeBuilder(ctx).DefaultConfig(t, true) + cleanup := builder.Build(t) + defer cleanup() + + execConfigA := builder.execConfig + execConfigA.RPC.RPCGasCap = params.TxGas + testClientA, cleanupA := builder.Build2ndNode(t, &SecondNodeParams{execConfig: execConfigA}) + defer cleanupA() + addr := common.HexToAddress("0x12345678") + estimateGas, err := testClientA.Client.EstimateGas(ctx, ethereum.CallMsg{To: &addr}) + Require(t, err) + if estimateGas <= params.TxGas { + Fatal(t, "Incorrect gas estimate") + } + + _, err = testClientA.Client.CallContract(ctx, ethereum.CallMsg{To: &addr}, nil) + Require(t, err) + + execConfigB := builder.execConfig + execConfigB.RPC.RPCGasCap = params.TxGas - 1 + testClientB, cleanupB := builder.Build2ndNode(t, &SecondNodeParams{execConfig: execConfigB}) + defer cleanupB() + _, err = testClientB.Client.EstimateGas(ctx, ethereum.CallMsg{To: &addr}) + Require(t, err) +} From 022d5ee4f2efe13882cf234c9fd4b08d3c622a45 Mon Sep 17 00:00:00 2001 From: Ganesh Vanahalli Date: Tue, 2 Jul 2024 16:23:30 -0500 Subject: [PATCH 33/72] Add LOG opcodes to Stylus tracing --- arbos/programs/api.go | 3 ++ arbos/util/tracing.go | 21 +++++++++++++ system_tests/program_test.go | 61 +++++++++++++++++++++++++++++------- 3 files changed, 74 insertions(+), 11 deletions(-) diff --git a/arbos/programs/api.go b/arbos/programs/api.go index 787f127ea4..65a58a47c2 100644 --- a/arbos/programs/api.go +++ b/arbos/programs/api.go @@ -228,6 +228,9 @@ func newApiClosures( return addr, res, cost, nil } emitLog := func(topics []common.Hash, data []byte) error { + if tracingInfo != nil { + tracingInfo.RecordEmitLog(topics, data) + } if readOnly { return vm.ErrWriteProtection } diff --git a/arbos/util/tracing.go b/arbos/util/tracing.go index 49b82d6d64..ed5acb33b2 100644 --- a/arbos/util/tracing.go +++ b/arbos/util/tracing.go @@ -4,6 +4,7 @@ package util import ( + "fmt" "math/big" "github.com/ethereum/go-ethereum/common" @@ -47,6 +48,26 @@ func NewTracingInfo(evm *vm.EVM, from, to common.Address, scenario TracingScenar } } +func (info *TracingInfo) RecordEmitLog(topics []common.Hash, data []byte) { + size := uint64(len(data)) + var args []uint256.Int + args = append(args, *uint256.NewInt(0)) + args = append(args, *uint256.NewInt(size)) + for _, topic := range topics { + args = append(args, HashToUint256(topic)) + } + memory := vm.NewMemory() + memory.Resize(size) + memory.Set(0, size, data) + scope := &vm.ScopeContext{ + Memory: memory, + Stack: TracingStackFromArgs(args...), + Contract: info.Contract, + } + logType := fmt.Sprintf("LOG%d", len(topics)) + info.Tracer.CaptureState(0, vm.StringToOp(logType), 0, 0, scope, []byte{}, info.Depth, nil) +} + func (info *TracingInfo) RecordStorageGet(key common.Hash) { tracer := info.Tracer if info.Scenario == TracingDuringEVM { diff --git a/system_tests/program_test.go b/system_tests/program_test.go index d8d9e05aa1..1687d74f04 100644 --- a/system_tests/program_test.go +++ b/system_tests/program_test.go @@ -18,6 +18,7 @@ import ( "github.com/ethereum/go-ethereum" "github.com/ethereum/go-ethereum/accounts/abi/bind" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethereum/go-ethereum/core/state" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/core/vm" @@ -641,10 +642,15 @@ func testReturnData(t *testing.T, jit bool) { func TestProgramLogs(t *testing.T) { t.Parallel() - testLogs(t, true) + testLogs(t, true, false) } -func testLogs(t *testing.T, jit bool) { +func TestProgramLogsWithTracing(t *testing.T) { + t.Parallel() + testLogs(t, true, true) +} + +func testLogs(t *testing.T, jit, tracing bool) { builder, auth, cleanup := setupProgramTest(t, jit) ctx := builder.ctx l2info := builder.L2Info @@ -653,6 +659,27 @@ func testLogs(t *testing.T, jit bool) { logAddr := deployWasm(t, ctx, auth, l2client, rustFile("log")) multiAddr := deployWasm(t, ctx, auth, l2client, rustFile("multicall")) + type traceLog struct { + Address common.Address `json:"address"` + Topics []common.Hash `json:"topics"` + Data hexutil.Bytes `json:"data"` + } + traceTx := func(tx *types.Transaction) []traceLog { + type traceLogs struct { + Logs []traceLog `json:"logs"` + } + var trace traceLogs + traceConfig := map[string]interface{}{ + "tracer": "callTracer", + "tracerConfig": map[string]interface{}{ + "withLog": true, + }, + } + rpc := l2client.Client() + err := rpc.CallContext(ctx, &trace, "debug_traceTransaction", tx.Hash(), traceConfig) + Require(t, err) + return trace.Logs + } ensure := func(tx *types.Transaction, err error) *types.Receipt { t.Helper() Require(t, err) @@ -679,6 +706,20 @@ func testLogs(t *testing.T, jit bool) { topics[j] = testhelpers.RandomHash() } data := randBytes(0, 48) + verifyLogTopicsAndData := func(logData []byte, logTopics []common.Hash) { + if !bytes.Equal(logData, data) { + Fatal(t, "data mismatch", logData, data) + } + if len(logTopics) != len(topics) { + Fatal(t, "topics mismatch", len(logTopics), len(topics)) + } + for j := 0; j < i; j++ { + if logTopics[j] != topics[j] { + Fatal(t, "topic mismatch", logTopics, topics) + } + } + } + args := encode(topics, data) tx := l2info.PrepareTxTo("Owner", &logAddr, 1e9, nil, args) receipt := ensure(tx, l2client.SendTransaction(ctx, tx)) @@ -687,16 +728,14 @@ func testLogs(t *testing.T, jit bool) { Fatal(t, "wrong number of logs", len(receipt.Logs)) } log := receipt.Logs[0] - if !bytes.Equal(log.Data, data) { - Fatal(t, "data mismatch", log.Data, data) - } - if len(log.Topics) != len(topics) { - Fatal(t, "topics mismatch", len(log.Topics), len(topics)) - } - for j := 0; j < i; j++ { - if log.Topics[j] != topics[j] { - Fatal(t, "topic mismatch", log.Topics, topics) + verifyLogTopicsAndData(log.Data, log.Topics) + if tracing { + logs := traceTx(tx) + if len(logs) != 1 { + Fatal(t, "wrong number of logs in trace", len(logs)) } + log := logs[0] + verifyLogTopicsAndData(log.Data, log.Topics) } } From 66e732e8b0a2bfe2062fc875810f64d36ba29215 Mon Sep 17 00:00:00 2001 From: Ganesh Vanahalli Date: Tue, 2 Jul 2024 16:39:15 -0500 Subject: [PATCH 34/72] address PR comments --- arbnode/batch_poster.go | 57 +++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/arbnode/batch_poster.go b/arbnode/batch_poster.go index a1bae7b6da..b6020c6dac 100644 --- a/arbnode/batch_poster.go +++ b/arbnode/batch_poster.go @@ -1143,6 +1143,8 @@ func (b *BatchPoster) maybePostSequencerBatch(ctx context.Context) (bool, error) var l1BoundMaxTimestamp uint64 = math.MaxUint64 var l1BoundMinBlockNumber uint64 var l1BoundMinTimestamp uint64 + var l1BoundMinBlockNumberWithBypass uint64 + var l1BoundMinTimestampWithBypass uint64 hasL1Bound := config.l1BlockBound != l1BlockBoundIgnore if hasL1Bound { var l1Bound *types.Header @@ -1187,17 +1189,19 @@ func (b *BatchPoster) maybePostSequencerBatch(ctx context.Context) (bool, error) l1BoundMaxBlockNumber = arbmath.SaturatingUAdd(l1BoundBlockNumber, arbmath.BigToUintSaturating(maxTimeVariationFutureBlocks)) l1BoundMaxTimestamp = arbmath.SaturatingUAdd(l1Bound.Time, arbmath.BigToUintSaturating(maxTimeVariationFutureSeconds)) + latestHeader, err := b.l1Reader.LastHeader(ctx) + if err != nil { + return false, err + } + latestBlockNumber := arbutil.ParentHeaderToL1BlockNumber(latestHeader) + l1BoundMinBlockNumber = arbmath.SaturatingUSub(latestBlockNumber, arbmath.BigToUintSaturating(maxTimeVariationDelayBlocks)) + l1BoundMinTimestamp = arbmath.SaturatingUSub(latestHeader.Time, arbmath.BigToUintSaturating(maxTimeVariationDelaySeconds)) + if config.L1BlockBoundBypass > 0 { - latestHeader, err := b.l1Reader.LastHeader(ctx) - if err != nil { - return false, err - } - latestBlockNumber := arbutil.ParentHeaderToL1BlockNumber(latestHeader) blockNumberWithPadding := arbmath.SaturatingUAdd(latestBlockNumber, uint64(config.L1BlockBoundBypass/ethPosBlockTime)) timestampWithPadding := arbmath.SaturatingUAdd(latestHeader.Time, uint64(config.L1BlockBoundBypass/time.Second)) - - l1BoundMinBlockNumber = arbmath.SaturatingUSub(blockNumberWithPadding, arbmath.BigToUintSaturating(maxTimeVariationDelayBlocks)) - l1BoundMinTimestamp = arbmath.SaturatingUSub(timestampWithPadding, arbmath.BigToUintSaturating(maxTimeVariationDelaySeconds)) + l1BoundMinBlockNumberWithBypass = arbmath.SaturatingUSub(blockNumberWithPadding, arbmath.BigToUintSaturating(maxTimeVariationDelayBlocks)) + l1BoundMinTimestampWithBypass = arbmath.SaturatingUSub(timestampWithPadding, arbmath.BigToUintSaturating(maxTimeVariationDelaySeconds)) } } @@ -1207,13 +1211,14 @@ func (b *BatchPoster) maybePostSequencerBatch(ctx context.Context) (bool, error) log.Error("error getting message from streamer", "error", err) break } - if msg.Message.Header.BlockNumber < l1BoundMinBlockNumber || msg.Message.Header.Timestamp < l1BoundMinTimestamp { + if msg.Message.Header.BlockNumber < l1BoundMinBlockNumberWithBypass || msg.Message.Header.Timestamp < l1BoundMinTimestampWithBypass { log.Error( "disabling L1 bound as batch posting message is close to the maximum delay", "blockNumber", msg.Message.Header.BlockNumber, - "l1BoundMinBlockNumber", l1BoundMinBlockNumber, + "l1BoundMinBlockNumberWithBypass", l1BoundMinBlockNumberWithBypass, "timestamp", msg.Message.Header.Timestamp, - "l1BoundMinTimestamp", l1BoundMinTimestamp, + "l1BoundMinTimestampWithBypass", l1BoundMinTimestampWithBypass, + "l1BlockBoundBypass", config.L1BlockBoundBypass, ) l1BoundMaxBlockNumber = math.MaxUint64 l1BoundMaxTimestamp = math.MaxUint64 @@ -1249,20 +1254,22 @@ func (b *BatchPoster) maybePostSequencerBatch(ctx context.Context) (bool, error) b.building.msgCount++ } - firstMsgTimeStamp := firstMsg.Message.Header.Timestamp - firstMsgBlockNumber := firstMsg.Message.Header.BlockNumber - batchNearL1BoundMinTimestamp := firstMsgTimeStamp <= l1BoundMinTimestamp+uint64(config.ReorgResistanceMargin/time.Second) - batchNearL1BoundMinBlockNumber := firstMsgBlockNumber <= l1BoundMinBlockNumber+uint64(config.ReorgResistanceMargin/ethPosBlockTime) - if config.ReorgResistanceMargin > 0 && (batchNearL1BoundMinTimestamp || batchNearL1BoundMinBlockNumber) { - log.Error( - "Disabling batch posting due to batch being within reorg resistance margin from layer 1 minimum block or timestamp bounds", - "reorgResistanceMargin", config.ReorgResistanceMargin, - "firstMsgTimeStamp", firstMsgTimeStamp, - "l1BoundMinTimestamp", l1BoundMinTimestamp, - "firstMsgBlockNumber", firstMsgBlockNumber, - "l1BoundMinBlockNumber", l1BoundMinBlockNumber, - ) - return false, errors.New("batch is within reorg resistance margin from layer 1 minimum block or timestamp bounds") + if hasL1Bound && config.ReorgResistanceMargin > 0 { + firstMsgBlockNumber := firstMsg.Message.Header.BlockNumber + firstMsgTimeStamp := firstMsg.Message.Header.Timestamp + batchNearL1BoundMinBlockNumber := firstMsgBlockNumber <= arbmath.SaturatingUAdd(l1BoundMinBlockNumber, uint64(config.ReorgResistanceMargin/ethPosBlockTime)) + batchNearL1BoundMinTimestamp := firstMsgTimeStamp <= arbmath.SaturatingUAdd(l1BoundMinTimestamp, uint64(config.ReorgResistanceMargin/time.Second)) + if batchNearL1BoundMinTimestamp || batchNearL1BoundMinBlockNumber { + log.Error( + "Disabling batch posting due to batch being within reorg resistance margin from layer 1 minimum block or timestamp bounds", + "reorgResistanceMargin", config.ReorgResistanceMargin, + "firstMsgTimeStamp", firstMsgTimeStamp, + "l1BoundMinTimestamp", l1BoundMinTimestamp, + "firstMsgBlockNumber", firstMsgBlockNumber, + "l1BoundMinBlockNumber", l1BoundMinBlockNumber, + ) + return false, errors.New("batch is within reorg resistance margin from layer 1 minimum block or timestamp bounds") + } } if !forcePostBatch || !b.building.haveUsefulMessage { From 4c96aaf8e3f67d604d9f67137c4d22670285e361 Mon Sep 17 00:00:00 2001 From: Ganesh Vanahalli Date: Tue, 2 Jul 2024 16:56:29 -0500 Subject: [PATCH 35/72] fix missed invocation of testLogs in stylus_test --- system_tests/stylus_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/system_tests/stylus_test.go b/system_tests/stylus_test.go index 97f3041196..a4bf4d8035 100644 --- a/system_tests/stylus_test.go +++ b/system_tests/stylus_test.go @@ -41,7 +41,7 @@ func TestProgramArbitratorReturnData(t *testing.T) { } func TestProgramArbitratorLogs(t *testing.T) { - testLogs(t, false) + testLogs(t, false, false) } func TestProgramArbitratorCreate(t *testing.T) { From 8b0424b6307bbc4b1974d5492431c6813929bb17 Mon Sep 17 00:00:00 2001 From: Aman Sanghi Date: Wed, 3 Jul 2024 17:48:27 +0530 Subject: [PATCH 36/72] fix --- execution/nodeInterface/virtual-contracts.go | 3 ++- system_tests/estimation_test.go | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/execution/nodeInterface/virtual-contracts.go b/execution/nodeInterface/virtual-contracts.go index d72ad0da8e..d04be10857 100644 --- a/execution/nodeInterface/virtual-contracts.go +++ b/execution/nodeInterface/virtual-contracts.go @@ -141,7 +141,8 @@ func init() { return } posterCost, _ := state.L1PricingState().PosterDataCost(msg, l1pricing.BatchPosterAddress, brotliCompressionLevel) - posterCostInL2Gas := arbos.GetPosterGas(state, header.BaseFee, msg.TxRunMode, posterCost) + // Use estimate mode because this is used to raise the gas cap, so we don't want to underestimate. + posterCostInL2Gas := arbos.GetPosterGas(state, header.BaseFee, core.MessageGasEstimationMode, posterCost) *gascap = arbmath.SaturatingUAdd(*gascap, posterCostInL2Gas) } diff --git a/system_tests/estimation_test.go b/system_tests/estimation_test.go index 0e0b329708..284c709fad 100644 --- a/system_tests/estimation_test.go +++ b/system_tests/estimation_test.go @@ -352,5 +352,7 @@ func TestGasEstimationWithRPCGasLimit(t *testing.T) { testClientB, cleanupB := builder.Build2ndNode(t, &SecondNodeParams{execConfig: execConfigB}) defer cleanupB() _, err = testClientB.Client.EstimateGas(ctx, ethereum.CallMsg{To: &addr}) - Require(t, err) + if err == nil { + Fatal(t, "EstimateGas passed with insufficient gas") + } } From b017e98eb9936fb0f3e12f24250bfaf2f7bc6ab9 Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Wed, 3 Jul 2024 11:46:04 -0700 Subject: [PATCH 37/72] Add submodule pin check to CI This checks that submodules are ancestors of origin/HEAD so that non-merged branches aren't accidentally checked in. --- .github/workflows/submodule-pin-check.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .github/workflows/submodule-pin-check.yml diff --git a/.github/workflows/submodule-pin-check.yml b/.github/workflows/submodule-pin-check.yml new file mode 100644 index 0000000000..43984f1158 --- /dev/null +++ b/.github/workflows/submodule-pin-check.yml @@ -0,0 +1,15 @@ +name: Merge Checks + +on: + pull_request: + branches: [ master ] + types: [synchronize, opened, reopened, labeled, unlabeled] + +jobs: + submodule-pin-check: + name: Submodule Pin Check + runs-on: ubuntu-latest + steps: + - name: Check all submodules are under origin/HEAD + run: git submodule foreach git merge-base --is-ancestor HEAD origin/HEAD + From b64dc4fcc611592653d7b3f7b032afb0bb780ebf Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Wed, 3 Jul 2024 12:02:50 -0700 Subject: [PATCH 38/72] Add checkout step --- .github/workflows/submodule-pin-check.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/submodule-pin-check.yml b/.github/workflows/submodule-pin-check.yml index 43984f1158..050eea4b25 100644 --- a/.github/workflows/submodule-pin-check.yml +++ b/.github/workflows/submodule-pin-check.yml @@ -10,6 +10,9 @@ jobs: name: Submodule Pin Check runs-on: ubuntu-latest steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Check all submodules are under origin/HEAD run: git submodule foreach git merge-base --is-ancestor HEAD origin/HEAD From 9e45391518493f4bfebd0e52e207572b0db9fb8d Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Wed, 3 Jul 2024 12:05:41 -0700 Subject: [PATCH 39/72] Check out submodules --- .github/workflows/submodule-pin-check.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/submodule-pin-check.yml b/.github/workflows/submodule-pin-check.yml index 050eea4b25..86a69c94cd 100644 --- a/.github/workflows/submodule-pin-check.yml +++ b/.github/workflows/submodule-pin-check.yml @@ -12,6 +12,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + with: + submodules: recursive - name: Check all submodules are under origin/HEAD run: git submodule foreach git merge-base --is-ancestor HEAD origin/HEAD From 30870b5a05af710cf56d03208ecd17bb626ae2db Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Wed, 3 Jul 2024 12:12:16 -0700 Subject: [PATCH 40/72] Update arbitrator/langs/bf pin --- arbitrator/langs/bf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arbitrator/langs/bf b/arbitrator/langs/bf index 062b87bad1..cb5750580f 160000 --- a/arbitrator/langs/bf +++ b/arbitrator/langs/bf @@ -1 +1 @@ -Subproject commit 062b87bad1ec00d42b9cc2b5ee41e63cd6ff1cbb +Subproject commit cb5750580f6990b5166ffce83de11b766a25ca31 From bbd649e664d4a1c47b0b09986015c6a548ddb12c Mon Sep 17 00:00:00 2001 From: Tristan-Wilson <87238672+Tristan-Wilson@users.noreply.github.com> Date: Wed, 3 Jul 2024 15:55:22 -0700 Subject: [PATCH 41/72] Update .github/workflows/submodule-pin-check.yml Co-authored-by: Gabriel de Quadros Ligneul <8294320+gligneul@users.noreply.github.com> --- .github/workflows/submodule-pin-check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/submodule-pin-check.yml b/.github/workflows/submodule-pin-check.yml index 86a69c94cd..ac0b8fba89 100644 --- a/.github/workflows/submodule-pin-check.yml +++ b/.github/workflows/submodule-pin-check.yml @@ -1,4 +1,4 @@ -name: Merge Checks +name: Submodule Pin Check on: pull_request: From c3999026cff51e75226072a300467cdc7f07126a Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Mon, 8 Jul 2024 04:44:35 -0700 Subject: [PATCH 42/72] Remove check for labeled/unlabeled --- .github/workflows/submodule-pin-check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/submodule-pin-check.yml b/.github/workflows/submodule-pin-check.yml index ac0b8fba89..b37cc7b9d5 100644 --- a/.github/workflows/submodule-pin-check.yml +++ b/.github/workflows/submodule-pin-check.yml @@ -3,7 +3,7 @@ name: Submodule Pin Check on: pull_request: branches: [ master ] - types: [synchronize, opened, reopened, labeled, unlabeled] + types: [synchronize, opened, reopened] jobs: submodule-pin-check: From 75a88e3c3cf1ca80cc7d33ef7a1723761442ddaa Mon Sep 17 00:00:00 2001 From: Aman Sanghi Date: Mon, 8 Jul 2024 19:50:06 +0530 Subject: [PATCH 43/72] Feed Client should log loud error when rate limited --- broadcastclient/broadcastclient.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/broadcastclient/broadcastclient.go b/broadcastclient/broadcastclient.go index 1167dba133..a0348b7f9c 100644 --- a/broadcastclient/broadcastclient.go +++ b/broadcastclient/broadcastclient.go @@ -25,6 +25,7 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" + "github.com/ethereum/go-ethereum/rpc" "github.com/offchainlabs/nitro/arbutil" m "github.com/offchainlabs/nitro/broadcaster/message" @@ -292,6 +293,11 @@ func (bc *BroadcastClient) connect(ctx context.Context, nextSeqNum arbutil.Messa return nil, err } if err != nil { + var httpError rpc.HTTPError + if errors.As(err, &httpError) { + if httpError.StatusCode == 429 { + log.Error("rate limit exceeded, please run own local relay because too many nodes are connecting to feed from same IP address", "err", err) + } return nil, fmt.Errorf("broadcast client unable to connect: %w", err) } if config.RequireChainId && !foundChainId { From c1284529b82c552118fa59a8c6441fc6e316c46c Mon Sep 17 00:00:00 2001 From: Aman Sanghi Date: Mon, 8 Jul 2024 20:03:35 +0530 Subject: [PATCH 44/72] fix --- broadcastclient/broadcastclient.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/broadcastclient/broadcastclient.go b/broadcastclient/broadcastclient.go index a0348b7f9c..d4135ed75f 100644 --- a/broadcastclient/broadcastclient.go +++ b/broadcastclient/broadcastclient.go @@ -294,10 +294,9 @@ func (bc *BroadcastClient) connect(ctx context.Context, nextSeqNum arbutil.Messa } if err != nil { var httpError rpc.HTTPError - if errors.As(err, &httpError) { - if httpError.StatusCode == 429 { - log.Error("rate limit exceeded, please run own local relay because too many nodes are connecting to feed from same IP address", "err", err) - } + if errors.As(err, &httpError) && httpError.StatusCode == 429 { + log.Error("rate limit exceeded, please run own local relay because too many nodes are connecting to feed from same IP address", "err", err) + } return nil, fmt.Errorf("broadcast client unable to connect: %w", err) } if config.RequireChainId && !foundChainId { From a1d1fc464203a2b7ab071e9ec281a9e2145f8b52 Mon Sep 17 00:00:00 2001 From: Ganesh Vanahalli Date: Mon, 8 Jul 2024 10:01:44 -0500 Subject: [PATCH 45/72] update option usage description --- arbnode/batch_poster.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arbnode/batch_poster.go b/arbnode/batch_poster.go index 9a8a7667a6..2617a9a629 100644 --- a/arbnode/batch_poster.go +++ b/arbnode/batch_poster.go @@ -220,7 +220,7 @@ func BatchPosterConfigAddOptions(prefix string, f *pflag.FlagSet) { f.Duration(prefix+".l1-block-bound-bypass", DefaultBatchPosterConfig.L1BlockBoundBypass, "post batches even if not within the layer 1 future bounds if we're within this margin of the max delay") f.Bool(prefix+".use-access-lists", DefaultBatchPosterConfig.UseAccessLists, "post batches with access lists to reduce gas usage (disabled for L3s)") f.Uint64(prefix+".gas-estimate-base-fee-multiple-bips", uint64(DefaultBatchPosterConfig.GasEstimateBaseFeeMultipleBips), "for gas estimation, use this multiple of the basefee (measured in basis points) as the max fee per gas") - f.Duration(prefix+".reorg-resistance-margin", DefaultBatchPosterConfig.ReorgResistanceMargin, "do not post batch if its within this duration from layer 1 minimum bounds ") + f.Duration(prefix+".reorg-resistance-margin", DefaultBatchPosterConfig.ReorgResistanceMargin, "do not post batch if its within this duration from layer 1 minimum bounds. Requires l1-block-bound option not be set to \"ignore\"") redislock.AddConfigOptions(prefix+".redis-lock", f) dataposter.DataPosterConfigAddOptions(prefix+".data-poster", f, dataposter.DefaultDataPosterConfig) genericconf.WalletConfigAddOptions(prefix+".parent-chain-wallet", f, DefaultBatchPosterConfig.ParentChainWallet.Pathname) From 2a09c3e1a65bbc6c3b8319efaaccfd543dfd0cbb Mon Sep 17 00:00:00 2001 From: Raul Jordan Date: Mon, 8 Jul 2024 10:20:30 -0500 Subject: [PATCH 46/72] feedback --- system_tests/validation_mock_test.go | 12 +++- validator/server_arb/execution_run_test.go | 81 +++++++++++----------- 2 files changed, 52 insertions(+), 41 deletions(-) diff --git a/system_tests/validation_mock_test.go b/system_tests/validation_mock_test.go index e61fd407ec..1330f24882 100644 --- a/system_tests/validation_mock_test.go +++ b/system_tests/validation_mock_test.go @@ -127,7 +127,17 @@ func (r *mockExecRun) GetStepAt(position uint64) containers.PromiseInterface[*va } func (r *mockExecRun) GetMachineHashesWithStepSize(machineStartIndex, stepSize, maxIterations uint64) containers.PromiseInterface[[]common.Hash] { - return containers.NewReadyPromise[[]common.Hash](nil, nil) + ctx := context.Background() + hashes := make([]common.Hash, 0) + for i := uint64(0); i < maxIterations; i++ { + absoluteMachineIndex := machineStartIndex + stepSize*(i+1) + stepResult, err := r.GetStepAt(absoluteMachineIndex).Await(ctx) + if err != nil { + return containers.NewReadyPromise[[]common.Hash](nil, err) + } + hashes = append(hashes, stepResult.Hash) + } + return containers.NewReadyPromise[[]common.Hash](hashes, nil) } func (r *mockExecRun) GetLastStep() containers.PromiseInterface[*validator.MachineStepResult] { diff --git a/validator/server_arb/execution_run_test.go b/validator/server_arb/execution_run_test.go index bb148b5d98..bdc1eefc4d 100644 --- a/validator/server_arb/execution_run_test.go +++ b/validator/server_arb/execution_run_test.go @@ -4,7 +4,6 @@ import ( "context" "strings" "testing" - "time" "github.com/ethereum/go-ethereum/common" "github.com/offchainlabs/nitro/validator" @@ -64,40 +63,41 @@ func (m *mockMachine) Freeze() {} func (m *mockMachine) Destroy() {} func Test_machineHashesWithStep(t *testing.T) { - mm := &mockMachine{} - e := &executionRun{} - ctx := context.Background() - t.Run("basic argument checks", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + e := &executionRun{} machStartIndex := uint64(0) stepSize := uint64(0) maxIterations := uint64(0) _, err := e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, maxIterations) - if !strings.Contains(err.Error(), "step size cannot be 0") { + if err == nil || !strings.Contains(err.Error(), "step size cannot be 0") { t.Error("Wrong error") } stepSize = uint64(1) _, err = e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, maxIterations) - if !strings.Contains(err.Error(), "number of iterations cannot be 0") { + if err == nil || !strings.Contains(err.Error(), "number of iterations cannot be 0") { t.Error("Wrong error") } }) t.Run("machine at start index 0 hash is the finished state hash", func(t *testing.T) { - mm.gs = validator.GoGlobalState{ - Batch: 1, + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + mm := &mockMachine{ + gs: validator.GoGlobalState{ + Batch: 1, + }, + totalSteps: 20, } machStartIndex := uint64(0) stepSize := uint64(1) maxIterations := uint64(1) - e.cache = &MachineCache{ - buildingLock: make(chan struct{}, 1), - machines: []MachineInterface{mm}, - finalMachine: mm, - } - go func() { - <-time.After(time.Millisecond * 50) - e.cache.buildingLock <- struct{}{} - }() + e := &executionRun{ + cache: NewMachineCache(ctx, func(_ context.Context) (MachineInterface, error) { + return mm, nil + }, &DefaultMachineCacheConfig), + } + hashes, err := e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, maxIterations) if err != nil { t.Fatal(err) @@ -111,24 +111,24 @@ func Test_machineHashesWithStep(t *testing.T) { } }) t.Run("can step in step size increments and collect hashes", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() initialGs := validator.GoGlobalState{ Batch: 1, PosInBatch: 0, } - mm.gs = initialGs - mm.totalSteps = 20 + mm := &mockMachine{ + gs: initialGs, + totalSteps: 20, + } machStartIndex := uint64(0) stepSize := uint64(5) maxIterations := uint64(4) - e.cache = &MachineCache{ - buildingLock: make(chan struct{}, 1), - machines: []MachineInterface{mm}, - finalMachine: mm, - } - go func() { - <-time.After(time.Millisecond * 50) - e.cache.buildingLock <- struct{}{} - }() + e := &executionRun{ + cache: NewMachineCache(ctx, func(_ context.Context) (MachineInterface, error) { + return mm, nil + }, &DefaultMachineCacheConfig), + } hashes, err := e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, maxIterations) if err != nil { t.Fatal(err) @@ -155,24 +155,25 @@ func Test_machineHashesWithStep(t *testing.T) { } }) t.Run("if finishes execution early, can return a smaller number of hashes than the expected max iterations", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() initialGs := validator.GoGlobalState{ Batch: 1, PosInBatch: 0, } - mm.gs = initialGs - mm.totalSteps = 20 + mm := &mockMachine{ + gs: initialGs, + totalSteps: 20, + } machStartIndex := uint64(0) stepSize := uint64(5) maxIterations := uint64(10) - e.cache = &MachineCache{ - buildingLock: make(chan struct{}, 1), - machines: []MachineInterface{mm}, - finalMachine: mm, - } - go func() { - <-time.After(time.Millisecond * 50) - e.cache.buildingLock <- struct{}{} - }() + e := &executionRun{ + cache: NewMachineCache(ctx, func(_ context.Context) (MachineInterface, error) { + return mm, nil + }, &DefaultMachineCacheConfig), + } + hashes, err := e.machineHashesWithStepSize(ctx, machStartIndex, stepSize, maxIterations) if err != nil { t.Fatal(err) From 80cf3d424f1cb567d96a7cf6fb866b4917e7fe4c Mon Sep 17 00:00:00 2001 From: Ganesh Vanahalli Date: Mon, 8 Jul 2024 10:52:29 -0500 Subject: [PATCH 47/72] add descriptive comments to stack args --- arbos/util/tracing.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arbos/util/tracing.go b/arbos/util/tracing.go index ed5acb33b2..f0f101bc20 100644 --- a/arbos/util/tracing.go +++ b/arbos/util/tracing.go @@ -51,10 +51,10 @@ func NewTracingInfo(evm *vm.EVM, from, to common.Address, scenario TracingScenar func (info *TracingInfo) RecordEmitLog(topics []common.Hash, data []byte) { size := uint64(len(data)) var args []uint256.Int - args = append(args, *uint256.NewInt(0)) - args = append(args, *uint256.NewInt(size)) + args = append(args, *uint256.NewInt(0)) // offset: byte offset in the memory in bytes + args = append(args, *uint256.NewInt(size)) // size: byte size to copy (length of data) for _, topic := range topics { - args = append(args, HashToUint256(topic)) + args = append(args, HashToUint256(topic)) // topic: 32-byte value. Max topics count is 4 } memory := vm.NewMemory() memory.Resize(size) From fc3402e43f3f5975fba15c26ed7d114bd09822c3 Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Tue, 9 Jul 2024 11:25:36 +0200 Subject: [PATCH 48/72] Support different branches for submodule pin check --- .github/workflows/submodule-pin-check.sh | 22 ++++++++++++++++++++++ .github/workflows/submodule-pin-check.yml | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100755 .github/workflows/submodule-pin-check.sh diff --git a/.github/workflows/submodule-pin-check.sh b/.github/workflows/submodule-pin-check.sh new file mode 100755 index 0000000000..a246e2e3e9 --- /dev/null +++ b/.github/workflows/submodule-pin-check.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +declare -Ar exceptions=( + [contracts]=origin/develop + [nitro-testnode]=origin/master +) + +divergent=0 +for mod in `git submodule --quiet foreach 'echo $name'`; do + branch=origin/HEAD + if [[ -v exceptions[$mod] ]]; then + branch=${exceptions[$mod]} + fi + + if ! git -C $mod merge-base --is-ancestor HEAD $branch; then + echo $mod diverges from $branch + divergent=1 + fi +done + +exit $divergent + diff --git a/.github/workflows/submodule-pin-check.yml b/.github/workflows/submodule-pin-check.yml index b37cc7b9d5..56dd2148f8 100644 --- a/.github/workflows/submodule-pin-check.yml +++ b/.github/workflows/submodule-pin-check.yml @@ -16,5 +16,5 @@ jobs: submodules: recursive - name: Check all submodules are under origin/HEAD - run: git submodule foreach git merge-base --is-ancestor HEAD origin/HEAD + run: ${{ github.workspace }}/.github/workflows/submodule-pin-check.sh From 919b73742cfd0c01faf7afee40d761a2c11b981c Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Tue, 9 Jul 2024 11:30:41 +0200 Subject: [PATCH 49/72] Fetch submodule branches --- .github/workflows/submodule-pin-check.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/submodule-pin-check.yml b/.github/workflows/submodule-pin-check.yml index 56dd2148f8..e459bad34d 100644 --- a/.github/workflows/submodule-pin-check.yml +++ b/.github/workflows/submodule-pin-check.yml @@ -13,8 +13,9 @@ jobs: - name: Checkout uses: actions/checkout@v4 with: + fetch-depth: 0 submodules: recursive - - name: Check all submodules are under origin/HEAD + - name: Check all submodules are ancestors of origin/HEAD or configured branch run: ${{ github.workspace }}/.github/workflows/submodule-pin-check.sh From c205b67f99e9cd6f038f521cf74a5bae98661f4c Mon Sep 17 00:00:00 2001 From: Aman Sanghi Date: Tue, 9 Jul 2024 16:17:27 +0530 Subject: [PATCH 50/72] Changes based on PR comments --- broadcastclient/broadcastclient.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/broadcastclient/broadcastclient.go b/broadcastclient/broadcastclient.go index d4135ed75f..2225341560 100644 --- a/broadcastclient/broadcastclient.go +++ b/broadcastclient/broadcastclient.go @@ -25,8 +25,6 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" - "github.com/ethereum/go-ethereum/rpc" - "github.com/offchainlabs/nitro/arbutil" m "github.com/offchainlabs/nitro/broadcaster/message" "github.com/offchainlabs/nitro/util/contracts" @@ -293,8 +291,8 @@ func (bc *BroadcastClient) connect(ctx context.Context, nextSeqNum arbutil.Messa return nil, err } if err != nil { - var httpError rpc.HTTPError - if errors.As(err, &httpError) && httpError.StatusCode == 429 { + connectionRejectedError := &ws.ConnectionRejectedError{} + if errors.As(err, &connectionRejectedError) && connectionRejectedError.StatusCode() == 429 { log.Error("rate limit exceeded, please run own local relay because too many nodes are connecting to feed from same IP address", "err", err) } return nil, fmt.Errorf("broadcast client unable to connect: %w", err) From 1cb907ca700ffdd98408b14a5891bea9aa117afe Mon Sep 17 00:00:00 2001 From: Lee Bousfield Date: Tue, 9 Jul 2024 09:51:53 -0600 Subject: [PATCH 51/72] Drop nitro p2p config options --- cmd/nitro-val/config.go | 3 --- cmd/nitro-val/nitro_val.go | 1 - cmd/nitro/nitro.go | 4 ---- 3 files changed, 8 deletions(-) diff --git a/cmd/nitro-val/config.go b/cmd/nitro-val/config.go index b52a1c6b5e..2adbe5e9aa 100644 --- a/cmd/nitro-val/config.go +++ b/cmd/nitro-val/config.go @@ -27,7 +27,6 @@ type ValidationNodeConfig struct { HTTP genericconf.HTTPConfig `koanf:"http"` WS genericconf.WSConfig `koanf:"ws"` IPC genericconf.IPCConfig `koanf:"ipc"` - P2P genericconf.P2PConfig `koanf:"p2p"` Auth genericconf.AuthRPCConfig `koanf:"auth"` Metrics bool `koanf:"metrics"` MetricsServer genericconf.MetricsServerConfig `koanf:"metrics-server"` @@ -67,7 +66,6 @@ var ValidationNodeConfigDefault = ValidationNodeConfig{ HTTP: HTTPConfigDefault, WS: WSConfigDefault, IPC: IPCConfigDefault, - P2P: genericconf.P2PConfigDefault, Auth: genericconf.AuthRPCConfigDefault, Metrics: false, MetricsServer: genericconf.MetricsServerConfigDefault, @@ -87,7 +85,6 @@ func ValidationNodeConfigAddOptions(f *flag.FlagSet) { genericconf.WSConfigAddOptions("ws", f) genericconf.IPCConfigAddOptions("ipc", f) genericconf.AuthRPCConfigAddOptions("auth", f) - genericconf.P2PConfigAddOptions("p2p", f) f.Bool("metrics", ValidationNodeConfigDefault.Metrics, "enable metrics") genericconf.MetricsServerAddOptions("metrics-server", f) f.Bool("pprof", ValidationNodeConfigDefault.PProf, "enable pprof") diff --git a/cmd/nitro-val/nitro_val.go b/cmd/nitro-val/nitro_val.go index 1e894336ea..6f5f546430 100644 --- a/cmd/nitro-val/nitro_val.go +++ b/cmd/nitro-val/nitro_val.go @@ -70,7 +70,6 @@ func mainImpl() int { nodeConfig.WS.Apply(&stackConf) nodeConfig.Auth.Apply(&stackConf) nodeConfig.IPC.Apply(&stackConf) - nodeConfig.P2P.Apply(&stackConf) vcsRevision, strippedRevision, vcsTime := confighelpers.GetVersion() stackConf.Version = strippedRevision diff --git a/cmd/nitro/nitro.go b/cmd/nitro/nitro.go index 1c4ad80186..572e6d2f06 100644 --- a/cmd/nitro/nitro.go +++ b/cmd/nitro/nitro.go @@ -183,7 +183,6 @@ func mainImpl() int { if nodeConfig.WS.ExposeAll { stackConf.WSModules = append(stackConf.WSModules, "personal") } - nodeConfig.P2P.Apply(&stackConf) vcsRevision, strippedRevision, vcsTime := confighelpers.GetVersion() stackConf.Version = strippedRevision @@ -717,7 +716,6 @@ type NodeConfig struct { IPC genericconf.IPCConfig `koanf:"ipc"` Auth genericconf.AuthRPCConfig `koanf:"auth"` GraphQL genericconf.GraphQLConfig `koanf:"graphql"` - P2P genericconf.P2PConfig `koanf:"p2p"` Metrics bool `koanf:"metrics"` MetricsServer genericconf.MetricsServerConfig `koanf:"metrics-server"` PProf bool `koanf:"pprof"` @@ -743,7 +741,6 @@ var NodeConfigDefault = NodeConfig{ IPC: genericconf.IPCConfigDefault, Auth: genericconf.AuthRPCConfigDefault, GraphQL: genericconf.GraphQLConfigDefault, - P2P: genericconf.P2PConfigDefault, Metrics: false, MetricsServer: genericconf.MetricsServerConfigDefault, Init: conf.InitConfigDefault, @@ -768,7 +765,6 @@ func NodeConfigAddOptions(f *flag.FlagSet) { genericconf.WSConfigAddOptions("ws", f) genericconf.IPCConfigAddOptions("ipc", f) genericconf.AuthRPCConfigAddOptions("auth", f) - genericconf.P2PConfigAddOptions("p2p", f) genericconf.GraphQLConfigAddOptions("graphql", f) f.Bool("metrics", NodeConfigDefault.Metrics, "enable metrics") genericconf.MetricsServerAddOptions("metrics-server", f) From 77ed71604595198438c444fbd2d1b91176200c07 Mon Sep 17 00:00:00 2001 From: Lee Bousfield Date: Tue, 9 Jul 2024 13:38:29 -0600 Subject: [PATCH 52/72] Drop P2PConfig struct --- cmd/genericconf/server.go | 61 --------------------------------------- 1 file changed, 61 deletions(-) diff --git a/cmd/genericconf/server.go b/cmd/genericconf/server.go index 9b8acd5f71..0b80b74594 100644 --- a/cmd/genericconf/server.go +++ b/cmd/genericconf/server.go @@ -8,9 +8,7 @@ import ( flag "github.com/spf13/pflag" - "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/node" - "github.com/ethereum/go-ethereum/p2p/enode" ) type HTTPConfig struct { @@ -189,65 +187,6 @@ func AuthRPCConfigAddOptions(prefix string, f *flag.FlagSet) { f.StringSlice(prefix+".api", AuthRPCConfigDefault.API, "APIs offered over the AUTH-RPC interface") } -type P2PConfig struct { - ListenAddr string `koanf:"listen-addr"` - NoDial bool `koanf:"no-dial"` - NoDiscovery bool `koanf:"no-discovery"` - MaxPeers int `koanf:"max-peers"` - DiscoveryV5 bool `koanf:"discovery-v5"` - DiscoveryV4 bool `koanf:"discovery-v4"` - Bootnodes []string `koanf:"bootnodes"` - BootnodesV5 []string `koanf:"bootnodes-v5"` -} - -func (p P2PConfig) Apply(stackConf *node.Config) { - stackConf.P2P.ListenAddr = p.ListenAddr - stackConf.P2P.NoDial = p.NoDial - stackConf.P2P.NoDiscovery = p.NoDiscovery - stackConf.P2P.MaxPeers = p.MaxPeers - stackConf.P2P.DiscoveryV5 = p.DiscoveryV5 - stackConf.P2P.DiscoveryV4 = p.DiscoveryV4 - stackConf.P2P.BootstrapNodes = parseBootnodes(p.Bootnodes) - stackConf.P2P.BootstrapNodesV5 = parseBootnodes(p.BootnodesV5) -} - -func parseBootnodes(urls []string) []*enode.Node { - nodes := make([]*enode.Node, 0, len(urls)) - for _, url := range urls { - if url != "" { - node, err := enode.Parse(enode.ValidSchemes, url) - if err != nil { - log.Crit("Bootstrap URL invalid", "enode", url, "err", err) - return nil - } - nodes = append(nodes, node) - } - } - return nodes -} - -var P2PConfigDefault = P2PConfig{ - ListenAddr: "", - NoDial: true, - NoDiscovery: true, - MaxPeers: 50, - DiscoveryV5: false, - DiscoveryV4: false, - Bootnodes: []string{}, - BootnodesV5: []string{}, -} - -func P2PConfigAddOptions(prefix string, f *flag.FlagSet) { - f.String(prefix+".listen-addr", P2PConfigDefault.ListenAddr, "P2P listen address") - f.Bool(prefix+".no-dial", P2PConfigDefault.NoDial, "P2P no dial") - f.Bool(prefix+".no-discovery", P2PConfigDefault.NoDiscovery, "P2P no discovery") - f.Int(prefix+".max-peers", P2PConfigDefault.MaxPeers, "P2P max peers") - f.Bool(prefix+".discovery-v5", P2PConfigDefault.DiscoveryV5, "P2P discovery v5") - f.Bool(prefix+".discovery-v4", P2PConfigDefault.DiscoveryV4, "P2P discovery v4") - f.StringSlice(prefix+".bootnodes", P2PConfigDefault.Bootnodes, "P2P bootnodes") - f.StringSlice(prefix+".bootnodes-v5", P2PConfigDefault.BootnodesV5, "P2P bootnodes v5") -} - type MetricsServerConfig struct { Addr string `koanf:"addr"` Port int `koanf:"port"` From c8307db86c6b264fbc3a1f3509cd7faaeb4e5eef Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg Date: Tue, 9 Jul 2024 16:27:36 -0600 Subject: [PATCH 53/72] export redis-url for valnode --- validator/valnode/redis/consumer.go | 1 + 1 file changed, 1 insertion(+) diff --git a/validator/valnode/redis/consumer.go b/validator/valnode/redis/consumer.go index 016f30bd61..84f597c095 100644 --- a/validator/valnode/redis/consumer.go +++ b/validator/valnode/redis/consumer.go @@ -149,6 +149,7 @@ var TestValidationServerConfig = ValidationServerConfig{ func ValidationServerConfigAddOptions(prefix string, f *pflag.FlagSet) { pubsub.ConsumerConfigAddOptions(prefix+".consumer-config", f) f.StringSlice(prefix+".module-roots", nil, "Supported module root hashes") + f.String(prefix+".redis-url", DefaultValidationServerConfig.RedisURL, "url of redis server") f.Duration(prefix+".stream-timeout", DefaultValidationServerConfig.StreamTimeout, "Timeout on polling for existence of redis streams") } From ce25064af57a4999f3854edc926454ef9e4da052 Mon Sep 17 00:00:00 2001 From: Lee Bousfield Date: Tue, 9 Jul 2024 19:10:57 -0600 Subject: [PATCH 54/72] Add consensus-v31 to Dockerfile --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 37c1020a42..634500f8a5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -205,6 +205,7 @@ COPY ./scripts/download-machine.sh . #RUN ./download-machine.sh consensus-v11.1 0x68e4fe5023f792d4ef584796c84d710303a5e12ea02d6e37e2b5e9c4332507c4 #RUN ./download-machine.sh consensus-v20 0x8b104a2e80ac6165dc58b9048de12f301d70b02a0ab51396c22b4b4b802a16a4 RUN ./download-machine.sh consensus-v30 0xb0de9cb89e4d944ae6023a3b62276e54804c242fd8c4c2d8e6cc4450f5fa8b1b && true +RUN ./download-machine.sh consensus-v31 0x260f5fa5c3176a856893642e149cf128b5a8de9f828afec8d11184415dd8dc69 FROM golang:1.21.10-bookworm as node-builder WORKDIR /workspace From 2ffb416b64682151412e405a4ab9e7ece016f414 Mon Sep 17 00:00:00 2001 From: Lee Bousfield Date: Tue, 9 Jul 2024 19:12:46 -0600 Subject: [PATCH 55/72] Uppercase AS keyword in Dockerfile --- Dockerfile | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/Dockerfile b/Dockerfile index 37c1020a42..91c1f46250 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM debian:bookworm-slim as brotli-wasm-builder +FROM debian:bookworm-slim AS brotli-wasm-builder WORKDIR /workspace RUN apt-get update && \ apt-get install -y cmake make git lbzip2 python3 xz-utils && \ @@ -10,10 +10,10 @@ COPY scripts/build-brotli.sh scripts/ COPY brotli brotli RUN cd emsdk && . ./emsdk_env.sh && cd .. && ./scripts/build-brotli.sh -w -t /workspace/install/ -FROM scratch as brotli-wasm-export +FROM scratch AS brotli-wasm-export COPY --from=brotli-wasm-builder /workspace/install/ / -FROM debian:bookworm-slim as brotli-library-builder +FROM debian:bookworm-slim AS brotli-library-builder WORKDIR /workspace COPY scripts/build-brotli.sh scripts/ COPY brotli brotli @@ -21,10 +21,10 @@ RUN apt-get update && \ apt-get install -y cmake make gcc git && \ ./scripts/build-brotli.sh -l -t /workspace/install/ -FROM scratch as brotli-library-export +FROM scratch AS brotli-library-export COPY --from=brotli-library-builder /workspace/install/ / -FROM node:18-bookworm-slim as contracts-builder +FROM node:18-bookworm-slim AS contracts-builder RUN apt-get update && \ apt-get install -y git python3 make g++ curl RUN curl -L https://foundry.paradigm.xyz | bash && . ~/.bashrc && ~/.foundry/bin/foundryup @@ -35,11 +35,11 @@ COPY contracts contracts/ COPY Makefile . RUN . ~/.bashrc && NITRO_BUILD_IGNORE_TIMESTAMPS=1 make build-solidity -FROM debian:bookworm-20231218 as wasm-base +FROM debian:bookworm-20231218 AS wasm-base WORKDIR /workspace RUN apt-get update && apt-get install -y curl build-essential=12.9 -FROM wasm-base as wasm-libs-builder +FROM wasm-base AS wasm-libs-builder # clang / lld used by soft-float wasm RUN apt-get update && \ apt-get install -y clang=1:14.0-55.7~deb12u1 lld=1:14.0-55.7~deb12u1 wabt @@ -59,10 +59,10 @@ COPY --from=brotli-wasm-export / target/ RUN apt-get update && apt-get install -y cmake RUN . ~/.cargo/env && NITRO_BUILD_IGNORE_TIMESTAMPS=1 RUSTFLAGS='-C symbol-mangling-version=v0' make build-wasm-libs -FROM scratch as wasm-libs-export +FROM scratch AS wasm-libs-export COPY --from=wasm-libs-builder /workspace/ / -FROM wasm-base as wasm-bin-builder +FROM wasm-base AS wasm-bin-builder # pinned go version RUN curl -L https://golang.org/dl/go1.21.10.linux-`dpkg --print-architecture`.tar.gz | tar -C /usr/local -xzf - COPY ./Makefile ./go.mod ./go.sum ./ @@ -91,7 +91,7 @@ COPY --from=contracts-builder workspace/contracts/node_modules/@offchainlabs/upg COPY --from=contracts-builder workspace/.make/ .make/ RUN PATH="$PATH:/usr/local/go/bin" NITRO_BUILD_IGNORE_TIMESTAMPS=1 make build-wasm-bin -FROM rust:1.75-slim-bookworm as prover-header-builder +FROM rust:1.75-slim-bookworm AS prover-header-builder WORKDIR /workspace RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get update && \ @@ -113,10 +113,10 @@ COPY brotli brotli RUN apt-get update && apt-get install -y cmake RUN NITRO_BUILD_IGNORE_TIMESTAMPS=1 make build-prover-header -FROM scratch as prover-header-export +FROM scratch AS prover-header-export COPY --from=prover-header-builder /workspace/target/ / -FROM rust:1.75-slim-bookworm as prover-builder +FROM rust:1.75-slim-bookworm AS prover-builder WORKDIR /workspace RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get update && \ @@ -156,10 +156,10 @@ RUN NITRO_BUILD_IGNORE_TIMESTAMPS=1 make build-prover-lib RUN NITRO_BUILD_IGNORE_TIMESTAMPS=1 make build-prover-bin RUN NITRO_BUILD_IGNORE_TIMESTAMPS=1 make build-jit -FROM scratch as prover-export +FROM scratch AS prover-export COPY --from=prover-builder /workspace/target/ / -FROM debian:bookworm-slim as module-root-calc +FROM debian:bookworm-slim AS module-root-calc WORKDIR /workspace RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get update && \ @@ -181,7 +181,7 @@ COPY ./solgen ./solgen COPY ./contracts ./contracts RUN NITRO_BUILD_IGNORE_TIMESTAMPS=1 make build-replay-env -FROM debian:bookworm-slim as machine-versions +FROM debian:bookworm-slim AS machine-versions RUN apt-get update && apt-get install -y unzip wget curl WORKDIR /workspace/machines # Download WAVM machines @@ -206,7 +206,7 @@ COPY ./scripts/download-machine.sh . #RUN ./download-machine.sh consensus-v20 0x8b104a2e80ac6165dc58b9048de12f301d70b02a0ab51396c22b4b4b802a16a4 RUN ./download-machine.sh consensus-v30 0xb0de9cb89e4d944ae6023a3b62276e54804c242fd8c4c2d8e6cc4450f5fa8b1b && true -FROM golang:1.21.10-bookworm as node-builder +FROM golang:1.21.10-bookworm AS node-builder WORKDIR /workspace ARG version="" ARG datetime="" @@ -233,17 +233,17 @@ RUN mkdir -p target/bin COPY .nitro-tag.txt /nitro-tag.txt RUN NITRO_BUILD_IGNORE_TIMESTAMPS=1 make build -FROM node-builder as fuzz-builder +FROM node-builder AS fuzz-builder RUN mkdir fuzzers/ RUN ./scripts/fuzz.bash --build --binary-path /workspace/fuzzers/ -FROM debian:bookworm-slim as nitro-fuzzer +FROM debian:bookworm-slim AS nitro-fuzzer COPY --from=fuzz-builder /workspace/fuzzers/*.fuzz /usr/local/bin/ COPY ./scripts/fuzz.bash /usr/local/bin RUN mkdir /fuzzcache ENTRYPOINT [ "/usr/local/bin/fuzz.bash", "FuzzStateTransition", "--binary-path", "/usr/local/bin/", "--fuzzcache-path", "/fuzzcache" ] -FROM debian:bookworm-slim as nitro-node-slim +FROM debian:bookworm-slim AS nitro-node-slim WORKDIR /home/user COPY --from=node-builder /workspace/target/bin/nitro /usr/local/bin/ COPY --from=node-builder /workspace/target/bin/relay /usr/local/bin/ @@ -271,9 +271,9 @@ USER user WORKDIR /home/user/ ENTRYPOINT [ "/usr/local/bin/nitro" ] -FROM offchainlabs/nitro-node:v2.3.4-rc.5-b4cc111 as nitro-legacy +FROM offchainlabs/nitro-node:v2.3.4-rc.5-b4cc111 AS nitro-legacy -FROM nitro-node-slim as nitro-node +FROM nitro-node-slim AS nitro-node USER root COPY --from=prover-export /bin/jit /usr/local/bin/ COPY --from=node-builder /workspace/target/bin/daserver /usr/local/bin/ @@ -293,7 +293,7 @@ ENTRYPOINT [ "/usr/local/bin/nitro" , "--validation.wasm.allowed-wasm-module-roo USER user -FROM nitro-node as nitro-node-validator +FROM nitro-node AS nitro-node-validator USER root COPY --from=nitro-legacy /usr/local/bin/nitro-val /home/user/nitro-legacy/bin/nitro-val COPY --from=nitro-legacy /usr/local/bin/jit /home/user/nitro-legacy/bin/jit @@ -305,7 +305,7 @@ COPY scripts/split-val-entry.sh /usr/local/bin ENTRYPOINT [ "/usr/local/bin/split-val-entry.sh" ] USER user -FROM nitro-node-validator as nitro-node-dev +FROM nitro-node-validator AS nitro-node-dev USER root # Copy in latest WASM module root RUN rm -f /home/user/target/machines/latest @@ -329,5 +329,5 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ USER user -FROM nitro-node as nitro-node-default +FROM nitro-node AS nitro-node-default # Just to ensure nitro-node-dist is default From dd77b2284f399ff266ab4855d48386d5969d06a6 Mon Sep 17 00:00:00 2001 From: Lee Bousfield Date: Tue, 9 Jul 2024 19:29:30 -0600 Subject: [PATCH 56/72] Try to fix docker artifact name in CI --- .github/workflows/docker.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 30ad88d91a..2aacf32f00 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -64,12 +64,12 @@ jobs: run: | cd nitro-testnode ./test-node.bash --init --dev & - + - name: Wait for rpc to come up shell: bash run: | ${{ github.workspace }}/.github/workflows/waitForNitro.sh - + - name: Print WAVM module root id: module-root run: | @@ -77,7 +77,7 @@ jobs: # We work around this by piping a tarball through stdout docker run --rm --entrypoint tar localhost:5000/nitro-node-dev:latest -cf - target/machines/latest | tar xf - module_root="$(cat "target/machines/latest/module-root.txt")" - echo "name=module-root=$module_root" >> $GITHUB_STATE + echo "module-root=$module_root" >> "$GITHUB_OUTPUT" echo -e "\x1b[1;34mWAVM module root:\x1b[0m $module_root" - name: Upload WAVM machine as artifact From e048bf2bc3e27a95c02ddfdc3c25f9ccda281457 Mon Sep 17 00:00:00 2001 From: Lee Bousfield Date: Tue, 9 Jul 2024 21:29:54 -0600 Subject: [PATCH 57/72] Add workflow_dispatch to gh actions merge-checks.yml --- .github/workflows/merge-checks.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/merge-checks.yml b/.github/workflows/merge-checks.yml index 6f291bbb22..be79520da4 100644 --- a/.github/workflows/merge-checks.yml +++ b/.github/workflows/merge-checks.yml @@ -1,6 +1,7 @@ name: Merge Checks on: + workflow_dispatch: pull_request: branches: [ master ] types: [synchronize, opened, reopened, labeled, unlabeled] From b9d7f87c325c1f3c110142010676cca05878a462 Mon Sep 17 00:00:00 2001 From: Lee Bousfield Date: Tue, 9 Jul 2024 23:59:56 -0600 Subject: [PATCH 58/72] Only run "design approved" check when necessary --- .github/workflows/merge-checks.yml | 40 ++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/.github/workflows/merge-checks.yml b/.github/workflows/merge-checks.yml index be79520da4..c50b28ada1 100644 --- a/.github/workflows/merge-checks.yml +++ b/.github/workflows/merge-checks.yml @@ -1,21 +1,41 @@ name: Merge Checks on: - workflow_dispatch: pull_request: branches: [ master ] types: [synchronize, opened, reopened, labeled, unlabeled] +permissions: + statuses: write + jobs: - design-approved-check: - if: ${{ !contains(github.event.*.labels.*.name, 'design-approved') }} - name: Design Approved Check + check-design-approved: + name: Check if Design Approved runs-on: ubuntu-latest steps: - - name: Check for design-approved label + - name: Check if design approved and update status run: | - echo "Pull request is missing the 'design-approved' label" - echo "This workflow fails so that the pull request cannot be merged" - exit 1 - - + set -x pipefail + status_state="pending" + if ${{ contains(github.event.*.labels.*.name, 'design-approved') }}; then + status_state="success" + else + resp="$(curl -sSL --fail-with-body \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "https://api.github.com/repos/$GITHUB_REPOSITORY/commits/${{ github.event.pull_request.head.sha }}/statuses")" + if ! jq -e '.[] | select(.context == "Design Approved Check")' > /dev/null <<< "$resp"; then + # Design not approved yet and no status exists + # Keep it without a status to keep the green checkmark appearing + # Merging will still be blocked until the required status appears + exit 0 + fi + fi + curl -sSL --fail-with-body \ + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "https://api.github.com/repos/$GITHUB_REPOSITORY/statuses/${{ github.event.pull_request.head.sha }}" \ + -d '{"context":"Design Approved Check","state":"'"$status_state"'"}' From 6e416f405125786ee088fc3b25a5cae0576249c1 Mon Sep 17 00:00:00 2001 From: Lee Bousfield Date: Wed, 10 Jul 2024 00:01:31 -0600 Subject: [PATCH 59/72] Clarify comment --- .github/workflows/merge-checks.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/merge-checks.yml b/.github/workflows/merge-checks.yml index c50b28ada1..b729df2b26 100644 --- a/.github/workflows/merge-checks.yml +++ b/.github/workflows/merge-checks.yml @@ -28,6 +28,7 @@ jobs: if ! jq -e '.[] | select(.context == "Design Approved Check")' > /dev/null <<< "$resp"; then # Design not approved yet and no status exists # Keep it without a status to keep the green checkmark appearing + # Otherwise, the commit and PR's CI will appear to be indefinitely pending # Merging will still be blocked until the required status appears exit 0 fi From a464232191be272f487ec248fcd8d994a2131083 Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Wed, 10 Jul 2024 13:25:26 +0200 Subject: [PATCH 60/72] Add exceptions for some arbitrator submodules --- .github/workflows/submodule-pin-check.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/submodule-pin-check.sh b/.github/workflows/submodule-pin-check.sh index a246e2e3e9..aecb287ce1 100755 --- a/.github/workflows/submodule-pin-check.sh +++ b/.github/workflows/submodule-pin-check.sh @@ -3,6 +3,10 @@ declare -Ar exceptions=( [contracts]=origin/develop [nitro-testnode]=origin/master + + #TODO Rachel to check these are the intended branches. + [arbitrator/langs/c]=origin/vm-storage-cache + [arbitrator/tools/wasmer]=origin/adopt-v4.2.8 ) divergent=0 From 12affc858deec7d2132e4aea9e387a45b7d69a17 Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Wed, 10 Jul 2024 13:38:06 +0200 Subject: [PATCH 61/72] Always make expiry index --- das/local_file_storage_service.go | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/das/local_file_storage_service.go b/das/local_file_storage_service.go index 6b0a5f0070..621cf3efdb 100644 --- a/das/local_file_storage_service.go +++ b/das/local_file_storage_service.go @@ -199,7 +199,7 @@ func (s *LocalFileStorageService) Put(ctx context.Context, data []byte, expiry u } } - if !s.enableLegacyLayout && s.layout.expiryEnabled { + if !s.enableLegacyLayout { if err := createHardLink(batchPath, s.layout.expiryPath(key, expiry)); err != nil { return fmt.Errorf("couldn't create by-expiry-path index entry: %w", err) } @@ -360,11 +360,9 @@ func migrate(fl *flatLayout, tl *trieLayout) error { return err } - if tl.expiryEnabled { - expiryPath := tl.expiryPath(batch.key, uint64(batch.expiry.Unix())) - if err = createHardLink(newPath, expiryPath); err != nil { - return err - } + expiryPath := tl.expiryPath(batch.key, uint64(batch.expiry.Unix())) + if err = createHardLink(newPath, expiryPath); err != nil { + return err } migrated++ } @@ -698,10 +696,8 @@ func (l *trieLayout) startMigration() error { return err } - if l.expiryEnabled { - if err := os.MkdirAll(filepath.Join(l.root, byExpiryTimestamp+migratingSuffix), 0o700); err != nil { - return err - } + if err := os.MkdirAll(filepath.Join(l.root, byExpiryTimestamp+migratingSuffix), 0o700); err != nil { + return err } return nil @@ -726,10 +722,8 @@ func (l *trieLayout) commitMigration() error { return err } - if l.expiryEnabled { - if err := removeSuffix(byExpiryTimestamp); err != nil { - return err - } + if err := removeSuffix(byExpiryTimestamp); err != nil { + return err } syscall.Sync() From 55e7d456981a65757911c6a22e22bb48198e3601 Mon Sep 17 00:00:00 2001 From: Tristan Wilson Date: Wed, 10 Jul 2024 17:20:44 +0200 Subject: [PATCH 62/72] Fix check for iter by expiry when expiry disabled --- das/local_file_storage_service_test.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/das/local_file_storage_service_test.go b/das/local_file_storage_service_test.go index 0b2ba9749d..cc27e293e3 100644 --- a/das/local_file_storage_service_test.go +++ b/das/local_file_storage_service_test.go @@ -98,10 +98,9 @@ func TestMigrationNoExpiry(t *testing.T) { countEntries(t, &s.layout, 4) getByHashAndCheck(t, s, "a", "b", "c", "d") - _, err = s.layout.iterateBatchesByTimestamp(time.Unix(int64(now+10), 0)) - if err == nil { - Fail(t, "can't iterate by timestamp when expiry is disabled") - } + // Can still iterate by timestamp even if expiry disabled + countTimestampEntries(t, &s.layout, time.Unix(int64(now+11), 0), 4) + } func TestMigrationExpiry(t *testing.T) { From fdf8beab5f5d2858c230d19a687155c710d8c75d Mon Sep 17 00:00:00 2001 From: Lee Bousfield Date: Wed, 10 Jul 2024 14:25:48 -0600 Subject: [PATCH 63/72] Don't close the sigint channel --- cmd/nitro/nitro.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/cmd/nitro/nitro.go b/cmd/nitro/nitro.go index 572e6d2f06..04bdeb3228 100644 --- a/cmd/nitro/nitro.go +++ b/cmd/nitro/nitro.go @@ -679,8 +679,6 @@ func mainImpl() int { exitCode = 1 } if nodeConfig.Init.ThenQuit { - close(sigint) - return exitCode } } @@ -694,9 +692,6 @@ func mainImpl() int { log.Info("shutting down because of sigint") } - // cause future ctrl+c's to panic - close(sigint) - return exitCode } From 5e3bb7b2e762afbd821b1565a07ca2559ceb8680 Mon Sep 17 00:00:00 2001 From: Lee Bousfield Date: Wed, 10 Jul 2024 15:27:34 -0600 Subject: [PATCH 64/72] Fix validator pending validations metric --- staker/block_validator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/staker/block_validator.go b/staker/block_validator.go index 0fea05469f..94ee907da5 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -815,7 +815,6 @@ validationsLoop: v.possiblyFatal(errors.New("failed to set SendingValidation status")) } validatorPendingValidationsGauge.Inc(1) - defer validatorPendingValidationsGauge.Dec(1) var runs []validator.ValidationRun for _, moduleRoot := range wasmRoots { run := v.chosenValidator[moduleRoot].Launch(input, moduleRoot) @@ -826,6 +825,7 @@ validationsLoop: validationStatus.Runs = runs validationStatus.Cancel = cancel v.LaunchUntrackedThread(func() { + defer validatorPendingValidationsGauge.Dec(1) defer cancel() replaced = validationStatus.replaceStatus(SendingValidation, ValidationSent) if !replaced { From 3e12fd6faaabf76c5e6c6aec01f396742d6f4a55 Mon Sep 17 00:00:00 2001 From: Gabriel de Quadros Ligneul Date: Wed, 10 Jul 2024 18:26:42 -0300 Subject: [PATCH 65/72] init: fix loading db with custom ancient path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a previous commit, we added a check to verify whether the database directory was empty before initializing it. This check highlighted a bug where an existing database wasn’t being loaded properly when the persistent.ancient flag was set. Before the empty check, the code worked because the database was reinitialized instead of just being loaded. --- cmd/nitro/init.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/nitro/init.go b/cmd/nitro/init.go index 97678a7d23..ea48ec8784 100644 --- a/cmd/nitro/init.go +++ b/cmd/nitro/init.go @@ -408,7 +408,7 @@ func isLeveldbNotExistError(err error) bool { func openInitializeChainDb(ctx context.Context, stack *node.Node, config *NodeConfig, chainId *big.Int, cacheConfig *core.CacheConfig, persistentConfig *conf.PersistentConfig, l1Client arbutil.L1Interface, rollupAddrs chaininfo.RollupAddresses) (ethdb.Database, *core.BlockChain, error) { if !config.Init.Force { - if readOnlyDb, err := stack.OpenDatabaseWithFreezerWithExtraOptions("l2chaindata", 0, 0, "", "l2chaindata/", true, persistentConfig.Pebble.ExtraOptions("l2chaindata")); err == nil { + if readOnlyDb, err := stack.OpenDatabaseWithFreezerWithExtraOptions("l2chaindata", 0, 0, config.Persistent.Ancient, "l2chaindata/", true, persistentConfig.Pebble.ExtraOptions("l2chaindata")); err == nil { if chainConfig := gethexec.TryReadStoredChainConfig(readOnlyDb); chainConfig != nil { readOnlyDb.Close() if !arbmath.BigEquals(chainConfig.ChainID, chainId) { From 725baeffbe1376bde833d082df82fbffe2e27245 Mon Sep 17 00:00:00 2001 From: xiaohuo Date: Thu, 11 Jul 2024 10:21:24 +0800 Subject: [PATCH 66/72] refactor: rename and set the default value to 0 for log query batch size --- staker/l1_validator.go | 2 +- staker/rollup_watcher.go | 4 ++-- staker/staker.go | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/staker/l1_validator.go b/staker/l1_validator.go index 14c46b5ddb..dd9673ee0b 100644 --- a/staker/l1_validator.go +++ b/staker/l1_validator.go @@ -381,7 +381,7 @@ func (v *L1Validator) generateNodeAction( return nil, false, nil } - successorNodes, err := v.rollup.LookupNodeChildren(ctx, stakerInfo.LatestStakedNode, stakerConfig.LogQueryRange, stakerInfo.LatestStakedNodeHash) + successorNodes, err := v.rollup.LookupNodeChildren(ctx, stakerInfo.LatestStakedNode, stakerConfig.LogQueryBatchSize, stakerInfo.LatestStakedNodeHash) if err != nil { return nil, false, fmt.Errorf("error looking up node %v (hash %v) children: %w", stakerInfo.LatestStakedNode, stakerInfo.LatestStakedNodeHash, err) } diff --git a/staker/rollup_watcher.go b/staker/rollup_watcher.go index 4ff8db0ce3..2c6841eb49 100644 --- a/staker/rollup_watcher.go +++ b/staker/rollup_watcher.go @@ -166,7 +166,7 @@ func (r *RollupWatcher) LookupNode(ctx context.Context, number uint64) (*NodeInf }, nil } -func (r *RollupWatcher) LookupNodeChildren(ctx context.Context, nodeNum uint64, logQueryRange uint64, nodeHash common.Hash) ([]*NodeInfo, error) { +func (r *RollupWatcher) LookupNodeChildren(ctx context.Context, nodeNum uint64, logQueryRangeSize uint64, nodeHash common.Hash) ([]*NodeInfo, error) { node, err := r.RollupUserLogic.GetNode(r.getCallOpts(ctx), nodeNum) if err != nil { return nil, err @@ -193,7 +193,7 @@ func (r *RollupWatcher) LookupNodeChildren(ctx context.Context, nodeNum uint64, // break down the query to avoid eth_getLogs query limit for toBlock.Cmp(fromBlock) > 0 { query.FromBlock = fromBlock - query.ToBlock = new(big.Int).Add(fromBlock, big.NewInt(int64(logQueryRange))) + query.ToBlock = new(big.Int).Add(fromBlock, big.NewInt(int64(logQueryRangeSize))) if query.ToBlock.Cmp(toBlock) > 0 { query.ToBlock = toBlock } diff --git a/staker/staker.go b/staker/staker.go index f7f412ca16..6d3b246230 100644 --- a/staker/staker.go +++ b/staker/staker.go @@ -90,7 +90,7 @@ type L1ValidatorConfig struct { ExtraGas uint64 `koanf:"extra-gas" reload:"hot"` Dangerous DangerousConfig `koanf:"dangerous"` ParentChainWallet genericconf.WalletConfig `koanf:"parent-chain-wallet"` - LogQueryRange uint64 `koanf:"log-query-range" reload:"hot"` + LogQueryBatchSize uint64 `koanf:"log-query-batch-size" reload:"hot"` strategy StakerStrategy gasRefunder common.Address @@ -157,7 +157,7 @@ var DefaultL1ValidatorConfig = L1ValidatorConfig{ ExtraGas: 50000, Dangerous: DefaultDangerousConfig, ParentChainWallet: DefaultValidatorL1WalletConfig, - LogQueryRange: 999, + LogQueryBatchSize: 0, } var TestL1ValidatorConfig = L1ValidatorConfig{ @@ -178,7 +178,7 @@ var TestL1ValidatorConfig = L1ValidatorConfig{ ExtraGas: 50000, Dangerous: DefaultDangerousConfig, ParentChainWallet: DefaultValidatorL1WalletConfig, - LogQueryRange: 999, + LogQueryBatchSize: 999, } var DefaultValidatorL1WalletConfig = genericconf.WalletConfig{ @@ -204,7 +204,7 @@ func L1ValidatorConfigAddOptions(prefix string, f *flag.FlagSet) { f.String(prefix+".gas-refunder-address", DefaultL1ValidatorConfig.GasRefunderAddress, "The gas refunder contract address (optional)") f.String(prefix+".redis-url", DefaultL1ValidatorConfig.RedisUrl, "redis url for L1 validator") f.Uint64(prefix+".extra-gas", DefaultL1ValidatorConfig.ExtraGas, "use this much more gas than estimation says is necessary to post transactions") - f.Uint64(prefix+".log-query-range", DefaultL1ValidatorConfig.LogQueryRange, "range ro query from eth_getLogs") + f.Uint64(prefix+".log-query-range", DefaultL1ValidatorConfig.LogQueryBatchSize, "range ro query from eth_getLogs") dataposter.DataPosterConfigAddOptions(prefix+".data-poster", f, dataposter.DefaultDataPosterConfigForValidator) DangerousConfigAddOptions(prefix+".dangerous", f) genericconf.WalletConfigAddOptions(prefix+".parent-chain-wallet", f, DefaultL1ValidatorConfig.ParentChainWallet.Pathname) From 0433d001bcee54de25fedcba0a51eb8bfb4963e5 Mon Sep 17 00:00:00 2001 From: xiaohuo Date: Thu, 11 Jul 2024 10:23:54 +0800 Subject: [PATCH 67/72] chore: rename command name --- staker/staker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/staker/staker.go b/staker/staker.go index 6d3b246230..a924660b7e 100644 --- a/staker/staker.go +++ b/staker/staker.go @@ -204,7 +204,7 @@ func L1ValidatorConfigAddOptions(prefix string, f *flag.FlagSet) { f.String(prefix+".gas-refunder-address", DefaultL1ValidatorConfig.GasRefunderAddress, "The gas refunder contract address (optional)") f.String(prefix+".redis-url", DefaultL1ValidatorConfig.RedisUrl, "redis url for L1 validator") f.Uint64(prefix+".extra-gas", DefaultL1ValidatorConfig.ExtraGas, "use this much more gas than estimation says is necessary to post transactions") - f.Uint64(prefix+".log-query-range", DefaultL1ValidatorConfig.LogQueryBatchSize, "range ro query from eth_getLogs") + f.Uint64(prefix+".log-query-range-size", DefaultL1ValidatorConfig.LogQueryBatchSize, "range ro query from eth_getLogs") dataposter.DataPosterConfigAddOptions(prefix+".data-poster", f, dataposter.DefaultDataPosterConfigForValidator) DangerousConfigAddOptions(prefix+".dangerous", f) genericconf.WalletConfigAddOptions(prefix+".parent-chain-wallet", f, DefaultL1ValidatorConfig.ParentChainWallet.Pathname) From e89e283295a6bcaad610fe1ee11b4fded1160627 Mon Sep 17 00:00:00 2001 From: xiaohuo Date: Thu, 11 Jul 2024 10:30:57 +0800 Subject: [PATCH 68/72] chore: set the default value to 0 for TestL1ValidatorConfig --- staker/staker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/staker/staker.go b/staker/staker.go index a924660b7e..5bc96f5192 100644 --- a/staker/staker.go +++ b/staker/staker.go @@ -178,7 +178,7 @@ var TestL1ValidatorConfig = L1ValidatorConfig{ ExtraGas: 50000, Dangerous: DefaultDangerousConfig, ParentChainWallet: DefaultValidatorL1WalletConfig, - LogQueryBatchSize: 999, + LogQueryBatchSize: 0, } var DefaultValidatorL1WalletConfig = genericconf.WalletConfig{ From d22da37b44e6fb29bce333a08885497994a1762f Mon Sep 17 00:00:00 2001 From: xiaohuo Date: Thu, 11 Jul 2024 12:01:17 +0800 Subject: [PATCH 69/72] fix: query all requested logs while logQueryRangeSize is 0 --- staker/rollup_watcher.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/staker/rollup_watcher.go b/staker/rollup_watcher.go index 2c6841eb49..b35bebd1c6 100644 --- a/staker/rollup_watcher.go +++ b/staker/rollup_watcher.go @@ -193,7 +193,11 @@ func (r *RollupWatcher) LookupNodeChildren(ctx context.Context, nodeNum uint64, // break down the query to avoid eth_getLogs query limit for toBlock.Cmp(fromBlock) > 0 { query.FromBlock = fromBlock - query.ToBlock = new(big.Int).Add(fromBlock, big.NewInt(int64(logQueryRangeSize))) + if logQueryRangeSize == 0 { + query.ToBlock = toBlock + } else { + query.ToBlock = new(big.Int).Add(fromBlock, big.NewInt(int64(logQueryRangeSize))) + } if query.ToBlock.Cmp(toBlock) > 0 { query.ToBlock = toBlock } From 4781fdfa0c550c51a944ce7090330e749c2d022b Mon Sep 17 00:00:00 2001 From: xiaohuo Date: Thu, 11 Jul 2024 12:11:51 +0800 Subject: [PATCH 70/72] fix: subcommand name for LogQueryBatchSize --- staker/staker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/staker/staker.go b/staker/staker.go index 5bc96f5192..24f5dc61e3 100644 --- a/staker/staker.go +++ b/staker/staker.go @@ -204,7 +204,7 @@ func L1ValidatorConfigAddOptions(prefix string, f *flag.FlagSet) { f.String(prefix+".gas-refunder-address", DefaultL1ValidatorConfig.GasRefunderAddress, "The gas refunder contract address (optional)") f.String(prefix+".redis-url", DefaultL1ValidatorConfig.RedisUrl, "redis url for L1 validator") f.Uint64(prefix+".extra-gas", DefaultL1ValidatorConfig.ExtraGas, "use this much more gas than estimation says is necessary to post transactions") - f.Uint64(prefix+".log-query-range-size", DefaultL1ValidatorConfig.LogQueryBatchSize, "range ro query from eth_getLogs") + f.Uint64(prefix+".log-query-batch-size", DefaultL1ValidatorConfig.LogQueryBatchSize, "range ro query from eth_getLogs") dataposter.DataPosterConfigAddOptions(prefix+".data-poster", f, dataposter.DefaultDataPosterConfigForValidator) DangerousConfigAddOptions(prefix+".dangerous", f) genericconf.WalletConfigAddOptions(prefix+".parent-chain-wallet", f, DefaultL1ValidatorConfig.ParentChainWallet.Pathname) From 8f2dc2ec62cd763cf5fd5b0919c9f02c365ddfaf Mon Sep 17 00:00:00 2001 From: Lee Bousfield Date: Wed, 10 Jul 2024 22:50:47 -0600 Subject: [PATCH 71/72] Switch merge checks action to pull_request_target event --- .github/workflows/merge-checks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/merge-checks.yml b/.github/workflows/merge-checks.yml index b729df2b26..6561c429e2 100644 --- a/.github/workflows/merge-checks.yml +++ b/.github/workflows/merge-checks.yml @@ -1,7 +1,7 @@ name: Merge Checks on: - pull_request: + pull_request_target: branches: [ master ] types: [synchronize, opened, reopened, labeled, unlabeled] From cbdf31fe9d5e071272a5e4bbdfc05af6cef1c711 Mon Sep 17 00:00:00 2001 From: Lee Bousfield Date: Fri, 12 Jul 2024 15:28:07 -0600 Subject: [PATCH 72/72] Fix disabling P2P --- cmd/nitro-val/nitro_val.go | 3 +++ cmd/nitro/nitro.go | 3 +++ 2 files changed, 6 insertions(+) diff --git a/cmd/nitro-val/nitro_val.go b/cmd/nitro-val/nitro_val.go index 6f5f546430..1a7d2e6283 100644 --- a/cmd/nitro-val/nitro_val.go +++ b/cmd/nitro-val/nitro_val.go @@ -70,6 +70,9 @@ func mainImpl() int { nodeConfig.WS.Apply(&stackConf) nodeConfig.Auth.Apply(&stackConf) nodeConfig.IPC.Apply(&stackConf) + stackConf.P2P.ListenAddr = "" + stackConf.P2P.NoDial = true + stackConf.P2P.NoDiscovery = true vcsRevision, strippedRevision, vcsTime := confighelpers.GetVersion() stackConf.Version = strippedRevision diff --git a/cmd/nitro/nitro.go b/cmd/nitro/nitro.go index 04bdeb3228..a4ef1b2945 100644 --- a/cmd/nitro/nitro.go +++ b/cmd/nitro/nitro.go @@ -183,6 +183,9 @@ func mainImpl() int { if nodeConfig.WS.ExposeAll { stackConf.WSModules = append(stackConf.WSModules, "personal") } + stackConf.P2P.ListenAddr = "" + stackConf.P2P.NoDial = true + stackConf.P2P.NoDiscovery = true vcsRevision, strippedRevision, vcsTime := confighelpers.GetVersion() stackConf.Version = strippedRevision