Skip to content

Commit

Permalink
add Badger key usage stats for /api/storage
Browse files Browse the repository at this point in the history
  • Loading branch information
DocSavage committed Mar 24, 2024
1 parent ea3b377 commit 5d6c0d8
Show file tree
Hide file tree
Showing 5 changed files with 203 additions and 55 deletions.
59 changes: 41 additions & 18 deletions datastore/datastore.go
Original file line number Diff line number Diff line change
Expand Up @@ -784,46 +784,69 @@ func GetStorageDetails() (map[string]StorageStats, error) {
return statsByStore, nil
}

type StorageSummary struct {
InstanceName string
DataType string
DataUUID string
RootUUID string
Bytes uint64
KeyUsage map[int]int
}

// GetStorageSummary returns JSON for all the data instances in the stores.
func GetStorageSummary() (string, error) {
stores, err := storage.AllStores()
if err != nil {
return "", err
}

breakdown := make(map[string]map[uint32]interface{}, len(stores))
breakdown := make(map[string]map[dvid.InstanceID]StorageSummary, len(stores))
for alias, store := range stores {
s, err := storage.GetDataSizes(store, nil)
sizes, err := storage.GetDataSizes(store, nil)
if err != nil {
return "", err
}
keyUsage, err := storage.GetStoreKeyUsage(store)
if err != nil {
return "", err
}
if s == nil {
if sizes == nil && keyUsage == nil {
continue
}

// For each instance ID, populate the instance info if available.
sdata := make(map[uint32]interface{}, len(s))
for instanceID, size := range s {
idata := struct {
Name string
DataType string
DataUUID string
RootUUID string
Bytes uint64
}{
Bytes: size,
}
// For each instance ID, populate the storage data.
sdata := make(map[dvid.InstanceID]StorageSummary)
for instanceID, size := range sizes {
var idata StorageSummary
d, err := getDataByInstanceID(instanceID)
if err != nil {
// we have no data instance so use placeholders.
idata.Name = fmt.Sprintf("unknown-%d", instanceID)
idata.InstanceName = fmt.Sprintf("unknown-%d", instanceID)
} else {
idata.Name = string(d.DataName())
idata.InstanceName = string(d.DataName())
idata.DataType = string(d.TypeName())
idata.DataUUID = string(d.DataUUID())
idata.RootUUID = string(d.RootUUID())
}
sdata[uint32(instanceID)] = idata
idata.Bytes = size
sdata[instanceID] = idata
}
for instanceID, usage := range keyUsage {
idata, found := sdata[instanceID]
if !found {
d, err := getDataByInstanceID(instanceID)
if err != nil {
// we have no data instance so use placeholders.
idata.InstanceName = fmt.Sprintf("unknown-%d", instanceID)
} else {
idata.InstanceName = string(d.DataName())
idata.DataType = string(d.TypeName())
idata.DataUUID = string(d.DataUUID())
idata.RootUUID = string(d.RootUUID())
}
}
idata.KeyUsage = usage
sdata[instanceID] = idata
}
breakdown[string(alias)] = sdata
}
Expand Down
48 changes: 48 additions & 0 deletions storage/badger/badger.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,54 @@ func (db *BadgerDB) metadataExists() (bool, error) {
return found, nil
}

// ---- KeyUsageViewer interface ------

func (db *BadgerDB) GetKeyUsage(ranges []storage.KeyRange) (hitsPerInstance []map[int]int, err error) {
if db == nil {
err = fmt.Errorf("can't call GetKeyUsage on nil BadgerDB")
return
}
hitsPerInstance = make([]map[int]int, len(ranges))
err = db.bdp.View(func(txn *badger.Txn) error {
opts := badger.DefaultIteratorOptions
opts.PrefetchValues = false
it := txn.NewIterator(opts)
defer it.Close()
for i, kr := range ranges {
// Allocate histogram for this key range (i.e., a data instance)
hitsPerInstance[i] = make(map[int]int)

// Iterate and get all kv across versions for each key.
maxVersionKey := storage.MaxVersionDataKeyFromKey(kr.Start)
numVersions := 1
for it.Seek(kr.Start); it.Valid(); it.Next() {
kv := new(storage.KeyValue)
item := it.Item()
kv.K = item.KeyCopy(nil)
storage.StoreKeyBytesRead <- len(kv.K)

// Add version to the stats for this key.
if bytes.Compare(kv.K, maxVersionKey) > 0 {
if storage.Key(kv.K).IsDataKey() {
maxVersionKey = storage.MaxVersionDataKeyFromKey(kr.Start)
}
hitsPerInstance[i][numVersions]++
numVersions = 0
}
numVersions++

// Did we pass the final key?
if bytes.Compare(kv.K, kr.OpenEnd) > 0 {
break
}

}
}
return nil
})
return
}

// ---- KeyValueGetter interface ------

// Get returns a value given a key.
Expand Down
14 changes: 14 additions & 0 deletions storage/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -657,3 +657,17 @@ func MaxVersionDataKey(d dvid.InstanceID, tk TKey) (Key, error) {
key = append(key, dvid.ClientID(dvid.MaxClientID).Bytes()...)
return append(key, 0xFF), nil
}

// MaxVersionDataKeyFromKey returns upper bound key from a full key..
func MaxVersionDataKeyFromKey(key Key) Key {
maxKey := make([]byte, len(key))
copy(maxKey, key)

// Set version and client ids to max
start := len(maxKey) - dvid.VersionIDSize - dvid.ClientIDSize - 1
copy(maxKey[start:start+dvid.VersionIDSize], dvid.VersionID(dvid.MaxVersionID).Bytes())
start += dvid.VersionIDSize
copy(maxKey[start:start+dvid.ClientIDSize], dvid.ClientID(dvid.MaxClientID).Bytes())
maxKey[len(maxKey)-1] = 0xFF
return Key(maxKey)
}
87 changes: 54 additions & 33 deletions storage/keyvalue.go
Original file line number Diff line number Diff line change
@@ -1,47 +1,47 @@
/*
Package storage provides a unified interface to a number of storage engines.
Since each storage engine has different capabilities, this package defines a
number of interfaces in addition to the core Engine interface, which all
storage engines should satisfy.
Package storage provides a unified interface to a number of storage engines.
Since each storage engine has different capabilities, this package defines a
number of interfaces in addition to the core Engine interface, which all
storage engines should satisfy.
Keys are specified as a combination of Context and a datatype-specific byte slice,
typically called an "type-specific key" (TKey) in DVID docs and code. The Context
provides DVID-wide namespacing and as such, must use one of the Context implementations
within the storage package. (This is enforced by making Context a Go opaque interface.)
The type-specific key formatting is entirely up to the datatype designer, although
use of dvid.Index is suggested.
Keys are specified as a combination of Context and a datatype-specific byte slice,
typically called an "type-specific key" (TKey) in DVID docs and code. The Context
provides DVID-wide namespacing and as such, must use one of the Context implementations
within the storage package. (This is enforced by making Context a Go opaque interface.)
The type-specific key formatting is entirely up to the datatype designer, although
use of dvid.Index is suggested.
Initially we are concentrating on key-value backends but expect to support
graph and perhaps relational databases, either using specialized databases
or software layers on top of an ordered key-value store.
Initially we are concentrating on key-value backends but expect to support
graph and perhaps relational databases, either using specialized databases
or software layers on top of an ordered key-value store.
Although we assume lexicographically ordering for range queries, there is some
variation in how variable size keys are treated. We assume all storage engines,
after appropriate DVID drivers, use the following definition of ordering:
Although we assume lexicographically ordering for range queries, there is some
variation in how variable size keys are treated. We assume all storage engines,
after appropriate DVID drivers, use the following definition of ordering:
A string s precedes a string t in lexicographic order if:
A string s precedes a string t in lexicographic order if:
* s is a prefix of t, or
* if c and d are respectively the first character of s and t in which s and t differ,
then c precedes d in character order.
* if s and t are equivalent for all of s, but t is longer
* s is a prefix of t, or
* if c and d are respectively the first character of s and t in which s and t differ,
then c precedes d in character order.
* if s and t are equivalent for all of s, but t is longer
Note: For the characters that are alphabetical letters, the character order coincides
with the alphabetical order. Digits precede letters, and uppercase letters precede
lowercase ones.
Note: For the characters that are alphabetical letters, the character order coincides
with the alphabetical order. Digits precede letters, and uppercase letters precede
lowercase ones.
Examples:
Examples:
composer precedes computer
house precedes household
Household precedes house
H2O precedes HOTEL
mydex precedes mydexterity
composer precedes computer
house precedes household
Household precedes house
H2O precedes HOTEL
mydex precedes mydexterity
Note that the above is different than shortlex order, which would group strings
based on length first.
Note that the above is different than shortlex order, which would group strings
based on length first.
The above lexicographical ordering is used by default for levedb variants.
The above lexicographical ordering is used by default for levedb variants.
*/
package storage

Expand Down Expand Up @@ -447,3 +447,24 @@ func getInstanceSizes(sv SizeViewer, instances []dvid.InstanceID) (map[dvid.Inst
}
return sizes, nil
}

func getKeyUsage(vw KeyUsageViewer, instances []dvid.InstanceID) (map[dvid.InstanceID]map[int]int, error) {
ranges := make([]KeyRange, len(instances))
for i, curID := range instances {
beg := constructDataKey(curID, 0, 0, minTKey)
end := constructDataKey(curID+1, 0, 0, minTKey)
ranges[i] = KeyRange{Start: beg, OpenEnd: end}
}
s, err := vw.GetKeyUsage(ranges)
if err != nil {
return nil, err
}
if len(s) != len(instances) {
return nil, fmt.Errorf("only got back %d instance key usages, not the requested %d instances", len(s), len(instances))
}
keyUsage := make(map[dvid.InstanceID]map[int]int, len(instances))
for i, curID := range instances {
keyUsage[curID] = s[i]
}
return keyUsage, nil
}
50 changes: 46 additions & 4 deletions storage/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,15 +159,57 @@ func NewStore(c dvid.StoreConfig) (db dvid.Store, created bool, err error) {
func Repair(name, path string) error {
e := GetEngine(name)
if e == nil {
return fmt.Errorf("Could not find engine with name %q", name)
return fmt.Errorf("could not find engine with name %q", name)
}
repairer, ok := e.(RepairableEngine)
if !ok {
return fmt.Errorf("Engine %q has no capability to be repaired.", name)
return fmt.Errorf("engine %q has no capability to be repaired", name)
}
return repairer.Repair(path)
}

// KeyUsageViewer stores can return how many keys are stored and a histogram of the
// number of versions per key for each data instance given by the key ranges.
type KeyUsageViewer interface {
GetKeyUsage(ranges []KeyRange) (histPerInstance []map[int]int, err error)
}

// GetStoreKeyUsage returns a histogram of the number of versions per key for each
// data instance in the store.
func GetStoreKeyUsage(store dvid.Store) (map[dvid.InstanceID]map[int]int, error) {
db, ok := store.(OrderedKeyValueGetter)
if !ok {
dvid.Infof("Cannot get data sizes for store %s, which is not an OrderedKeyValueGetter store\n", db)
return nil, nil
}
viewer, ok := store.(KeyUsageViewer)
if !ok {
dvid.Infof("Cannot get key usage for store %s, which is not a KeyUsageViewer store\n", db)
return nil, nil
}

// Scan store and get all instances.
var ids []dvid.InstanceID
var curID dvid.InstanceID
for {
var done bool
var err error
curID, done, err = getNextInstance(db, curID)
if err != nil {
return nil, err
}
if done {
break
}
ids = append(ids, curID)
}
if len(ids) == 0 {
return nil, nil
}

return getKeyUsage(viewer, ids)
}

// SizeViewer stores are able to return the size in bytes stored for a given range of Key.
type SizeViewer interface {
GetApproximateSizes(ranges []KeyRange) ([]uint64, error)
Expand All @@ -182,12 +224,12 @@ type SizeViewer interface {
func GetDataSizes(store dvid.Store, instances []dvid.InstanceID) (map[dvid.InstanceID]uint64, error) {
db, ok := store.(OrderedKeyValueGetter)
if !ok {
dvid.Infof("Cannot get data sizes for store %s, which is not an OrderedKeyValueGetter store", db)
dvid.Infof("Cannot get data sizes for store %s, which is not an OrderedKeyValueGetter store\n", db)
return nil, nil
}
sv, ok := db.(SizeViewer)
if !ok {
dvid.Infof("Cannot get data sizes for store %s, which is not an SizeViewer store", db)
dvid.Infof("Cannot get data sizes for store %s, which is not an SizeViewer store\n", db)
return nil, nil
}
// Handle prespecified instance IDs.
Expand Down

0 comments on commit 5d6c0d8

Please sign in to comment.