Skip to content

Commit

Permalink
feat: [log retention improvements pt. 1] introduce file layout interf…
Browse files Browse the repository at this point in the history
…ace (#2534)

## Description
Log Retention Improvements:

Users have had issues with logs from long running enclaves taking up
tons of storage. We have a retention mechanism that automatically
rotates logs after some time but currently it a) is only able to rotate
logs weekly b) can not be configured.

These improvements will allow retention to be as granular as hourly and
will allow users to configure the retention period (eg. `1hr`, `2hr`,
`1day`, `1week`). To do this, a few changes need to happen. Most notably
the way logs are stored and retrieved needs to change to support
rotating log files hourly. Implementing this requires changes across a
few components(`LogsAggregator`, `LogsDatabaseClient`, `LogFileManager`,
cli, so I'll be making them incrementally.

- [x] PR 1: Introduce `LogFileLayout` and `PerWeekLogFileLayout` and
adjust `LogFileManager` and `LogsDatabaseClient` to use this for
retrieving log file paths
- [ ] PR 2: Make retention configurable via a CLI flag
- [ ] PR 3: Implement `PerHourFileLayout` and swap storage format from
`PerWeekFileLayout` to `PerHourFileLayout`
- [ ] PR 4: Make `LogsAggregator` use `LogFileLayout` for determining
storage format and set it to use `PerHourFileLayout`
------------------------------
This first PR sets the stage for this change by introducing a new
interface called `LogFileLayout` and migrating the existing storage
format to use it. Right now, knowledge about the log storage format is
spread across several entities (`LogsAggregator` - to store,
`LogFileManager` - to rotate logs, `StreamLogsStrategy` - to read logs).
This interface consolidates knowledge of how logs are stored into one
module that can be used by any entity doing operations that require
retrieving log files from the filesystem. This will make the transition
to a different storage format safer (only requires implementing +
testing `PerHourFileLayout` module) and makes it easier to swap out the
storage format in the future (eg. if even more granular retention is
need) without breaking other entities.

## Is this change user facing?
NO

## References 
#2443
#2190
  • Loading branch information
tedim52 authored Aug 16, 2024
1 parent 926de23 commit a494278
Show file tree
Hide file tree
Showing 17 changed files with 716 additions and 139 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,20 @@ func (client *kurtosisBackendLogsDatabaseClient) FilterExistingServiceUuids(
return filteredServiceUuidsSet, nil
}

func (client *kurtosisBackendLogsDatabaseClient) StartLogFileManagement(ctx context.Context) {
// no log file management needs to be done for this logs db client
}

func (client *kurtosisBackendLogsDatabaseClient) RemoveEnclaveLogs(enclaveUuid string) error {
// no log file management needs to be done for this logs db client
return nil
}

func (client *kurtosisBackendLogsDatabaseClient) RemoveAllLogs() error {
// no log file management needs to be done for this logs db client
return nil
}

// ====================================================================================================
//
// Private helper functions
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package file_layout

import (
"github.com/kurtosis-tech/kurtosis/engine/server/engine/centralized_logs/client_implementations/persistent_volume/volume_filesystem"
"time"
)

type LogFileLayout interface {
// GetLogFileLayoutFormat returns a string representation the "format" that files are laid out in
// Formats are composed:
// - "/" - representing a nested directory
// - "<enclaveUuid>" - representing where an enclave uuid is inserted
// - "<serviceUuid>" - representing where a service uuid is inserted
// - time formats specified by strftime https://cplusplus.com/reference/ctime/strftime/
// - any other ascii text
GetLogFileLayoutFormat() string

// GetLogFilePath gets the log file path for [serviceUuid] in [enclaveUuid] at [time]
GetLogFilePath(time time.Time, enclaveUuid, serviceUuid string) string

// GetLogFilePaths retrieves a list of filepaths [filesystem] for [serviceUuid] in [enclaveUuid]
// If [retentionPeriodIntervals] is set to -1, retrieves all filepaths from the currentTime till [retentionPeriod] in order
// If [retentionPeriodIntervals] is positive, retrieves all filepaths within the range [currentTime - retentionPeriod] and [currentTime - (retentionPeriodIntervals) * retentionPeriod]
// Returned filepaths sorted from most recent to least recent
GetLogFilePaths(filesystem volume_filesystem.VolumeFilesystem, retentionPeriod time.Duration, retentionPeriodIntervals int, enclaveUuid, serviceUuid string) ([]string, error)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
package file_layout

import (
"fmt"
"github.com/kurtosis-tech/kurtosis/engine/server/engine/centralized_logs/client_implementations/persistent_volume/logs_clock"
"github.com/kurtosis-tech/kurtosis/engine/server/engine/centralized_logs/client_implementations/persistent_volume/volume_consts"
"github.com/kurtosis-tech/kurtosis/engine/server/engine/centralized_logs/client_implementations/persistent_volume/volume_filesystem"
"golang.org/x/exp/slices"
"math"
"os"
"strconv"
"time"
)

const (
oneWeekInHours = 7 * 24
oneWeekDuration = oneWeekInHours * time.Hour

// basepath /year/week
PerWeekDirPathStr = "%s%s/%s/"

// ... enclave uuid/service uuid <filetype>
PerWeekFilePathFmtStr = PerWeekDirPathStr + "%s/%s%s"
)

type PerWeekFileLayout struct {
time logs_clock.LogsClock
}

func NewPerWeekFileLayout(time logs_clock.LogsClock) *PerWeekFileLayout {
return &PerWeekFileLayout{time: time}
}

func (pwf *PerWeekFileLayout) GetLogFileLayoutFormat() string {
// Right now this format is specifically made for Vector Logs Aggregators format
// This wil be used my Vector LogsAggregator to determine the path to output to
return "/var/log/kurtosis/%%Y/%%V/{{ enclave_uuid }}/{{ service_uuid }}.json"
}

func (pwf *PerWeekFileLayout) GetLogFilePath(time time.Time, enclaveUuid, serviceUuid string) string {
year, week := time.ISOWeek()
return getLogFilePath(year, week, enclaveUuid, serviceUuid)
}

func (pwf *PerWeekFileLayout) GetLogFilePaths(
filesystem volume_filesystem.VolumeFilesystem,
retentionPeriod time.Duration,
retentionPeriodIntervals int,
enclaveUuid, serviceUuid string) ([]string, error) {
var paths []string
retentionPeriodInWeeks := DurationToWeeks(retentionPeriod)

if retentionPeriodIntervals < 0 {
return pwf.getLogFilePathsFromNowTillRetentionPeriod(filesystem, retentionPeriodInWeeks, enclaveUuid, serviceUuid)
} else {
paths = pwf.getLogFilePathsBeyondRetentionPeriod(filesystem, retentionPeriodInWeeks, retentionPeriodIntervals, enclaveUuid, serviceUuid)
}

return paths, nil
}

func (pwf *PerWeekFileLayout) getLogFilePathsFromNowTillRetentionPeriod(fs volume_filesystem.VolumeFilesystem, retentionPeriodInWeeks int, enclaveUuid, serviceUuid string) ([]string, error) {
var paths []string
currentTime := pwf.time.Now()

// scan for first existing log file
firstWeekWithLogs := 0
for i := 0; i < retentionPeriodInWeeks; i++ {
year, week := currentTime.Add(time.Duration(-i) * oneWeekDuration).ISOWeek()
filePathStr := getLogFilePath(year, week, enclaveUuid, serviceUuid)
if _, err := fs.Stat(filePathStr); err == nil {
paths = append(paths, filePathStr)
firstWeekWithLogs = i
break
} else {
// return if error is not due to nonexistent file path
if !os.IsNotExist(err) {
return paths, err
}
}
}

// scan for remaining files as far back as they exist before the retention period
for i := firstWeekWithLogs + 1; i < retentionPeriodInWeeks; i++ {
year, week := currentTime.Add(time.Duration(-i) * oneWeekDuration).ISOWeek()
filePathStr := getLogFilePath(year, week, enclaveUuid, serviceUuid)
if _, err := fs.Stat(filePathStr); err != nil {
break
}
paths = append(paths, filePathStr)
}

// reverse for oldest to most recent
slices.Reverse(paths)

return paths, nil
}

func (pwf *PerWeekFileLayout) getLogFilePathsBeyondRetentionPeriod(fs volume_filesystem.VolumeFilesystem, retentionPeriodInWeeks int, retentionPeriodIntervals int, enclaveUuid, serviceUuid string) []string {
var paths []string
currentTime := pwf.time.Now()

// scan for log files just beyond the retention period
for i := 0; i < retentionPeriodIntervals; i++ {
numWeeksToGoBack := retentionPeriodInWeeks + i
year, weekToRemove := currentTime.Add(time.Duration(-numWeeksToGoBack) * oneWeekDuration).ISOWeek()
filePathStr := getLogFilePath(year, weekToRemove, enclaveUuid, serviceUuid)
if _, err := fs.Stat(filePathStr); err != nil {
continue
}
paths = append(paths, filePathStr)
}

return paths
}

func DurationToWeeks(d time.Duration) int {
return int(math.Round(d.Hours() / float64(oneWeekInHours)))
}

func getLogFilePath(year, week int, enclaveUuid, serviceUuid string) string {
formattedWeekNum := fmt.Sprintf("%02d", week)
return fmt.Sprintf(PerWeekFilePathFmtStr, volume_consts.LogsStorageDirpath, strconv.Itoa(year), formattedWeekNum, enclaveUuid, serviceUuid, volume_consts.Filetype)
}
Loading

0 comments on commit a494278

Please sign in to comment.