Skip to content

Commit

Permalink
rpm: implement an RPM filescanner to discern RPM filepaths
Browse files Browse the repository at this point in the history
Using the filepaths discovered by the RPM filescanner we can judge
whether or not a language package has been installed via RPM or not.

Signed-off-by: crozzy <[email protected]>
  • Loading branch information
crozzy committed Apr 18, 2024
1 parent c930927 commit 9f58c17
Show file tree
Hide file tree
Showing 7 changed files with 392 additions and 103 deletions.
1 change: 1 addition & 0 deletions file.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ type FileKind string

const (
FileKindWhiteout = FileKind("whiteout")
FileKindRPM = FileKind("rpm")
)

// File represents interesting files that are found in the layer.
Expand Down
6 changes: 6 additions & 0 deletions linux/coalescer.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ func (c *Coalescer) Coalesce(ctx context.Context, layerArtifacts []*indexer.Laye
for db, pkgs := range tmp {
dbs[db] = pkgs
}
for _, f := range artifacts.Files {
if c.ir.Files == nil {
c.ir.Files = make(map[string][]claircore.File)
}
c.ir.Files[artifacts.Hash.String()] = append(c.ir.Files[artifacts.Hash.String()], f)
}
}

for db, packages := range dbs {
Expand Down
3 changes: 3 additions & 0 deletions rpm/ecosystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ func NewEcosystem(_ context.Context) *indexer.Ecosystem {
RepositoryScanners: func(ctx context.Context) ([]indexer.RepositoryScanner, error) {
return []indexer.RepositoryScanner{}, nil
},
FileScanners: func(ctx context.Context) ([]indexer.FileScanner, error) {
return []indexer.FileScanner{&FileScanner{}}, nil
},
Coalescer: func(ctx context.Context) (indexer.Coalescer, error) {
return linux.NewCoalescer(), nil
},
Expand Down
177 changes: 177 additions & 0 deletions rpm/filescanner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
package rpm

import (
"context"
"fmt"
"io"
"io/fs"
"os"
"path"
"runtime/trace"

"github.com/quay/zlog"

"github.com/quay/claircore"
"github.com/quay/claircore/indexer"
"github.com/quay/claircore/rpm/bdb"
"github.com/quay/claircore/rpm/ndb"
"github.com/quay/claircore/rpm/sqlite"
)

const (
scannerName = "rpm"
scannerVersion = "1"
scannerKind = "file"
)

var (
_ indexer.FileScanner = (*FileScanner)(nil)
_ indexer.VersionedScanner = (*FileScanner)(nil)
)

// FileScanner implements [indexer.FileScanner], it examines RPM
// databases and reports installed files.
type FileScanner struct{}

// Name implements [indexer.VersionedScanner].
func (*FileScanner) Name() string { return scannerName }

// Version implements [indexer.VersionedScanner].
func (*FileScanner) Version() string { return scannerVersion }

// Kind implements [indexer.VersionedScanner]
func (*FileScanner) Kind() string { return scannerKind }

// Scan reports any found Files that were installed via RPMs in the
// layer.
//
// It's an expected outcome to return (nil, nil) no RPM packages are found in the Layer.
func (s *FileScanner) Scan(ctx context.Context, layer *claircore.Layer) ([]claircore.File, error) {
if err := ctx.Err(); err != nil {
return nil, err
}
defer trace.StartRegion(ctx, "FileScanner.Scan").End()
trace.Log(ctx, "layer", layer.Hash.String())
ctx = zlog.ContextWithValues(ctx,
"component", "rpm/FileScanner.Scan",
"version", s.Version(),
"layer", layer.Hash.String())
zlog.Debug(ctx).Msg("start")
defer zlog.Debug(ctx).Msg("done")

sys, err := layer.FS()
if err != nil {
return nil, fmt.Errorf("rpm: unable to open layer: %w", err)
}

found := make([]foundDB, 0)
if err := fs.WalkDir(sys, ".", findDBs(ctx, &found, sys)); err != nil {
return nil, fmt.Errorf("rpm: error walking fs: %w", err)
}
if len(found) == 0 {
return nil, nil
}

done := map[string]struct{}{}
files := []claircore.File{}

for _, db := range found {
ctx := zlog.ContextWithValues(ctx, "db", db.String())
zlog.Debug(ctx).Msg("examining database")
if _, ok := done[db.Path]; ok {
zlog.Debug(ctx).Msg("already seen, skipping")
continue
}
done[db.Path] = struct{}{}
fs, err := dbFileToNativeDB(ctx, sys, db)
if err != nil {
return nil, fmt.Errorf("rpm: error getting native DBs: %w", err)
}
files = append(files, fs...)
}

zlog.Debug(ctx).Int("count", len(found)).Msg("found possible databases")

return files, nil
}

func dbFileToNativeDB(ctx context.Context, sys fs.FS, db foundDB) ([]claircore.File, error) {
var nat nativeDB // see native_db.go:/nativeDB
switch db.Kind {
case kindSQLite:
r, err := sys.Open(path.Join(db.Path, `rpmdb.sqlite`))
if err != nil {
return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err)
}
defer func() {
if err := r.Close(); err != nil {
zlog.Warn(ctx).Err(err).Msg("unable to close sqlite db")
}
}()
f, err := os.CreateTemp(os.TempDir(), `rpmdb.sqlite.*`)
if err != nil {
return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err)
}
defer func() {
if err := os.Remove(f.Name()); err != nil {
zlog.Error(ctx).Err(err).Msg("unable to unlink sqlite db")
}
if err := f.Close(); err != nil {
zlog.Warn(ctx).Err(err).Msg("unable to close sqlite db")
}
}()
zlog.Debug(ctx).Str("file", f.Name()).Msg("copying sqlite db out of FS")
if _, err := io.Copy(f, r); err != nil {
return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err)
}
if err := f.Sync(); err != nil {
return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err)
}
sdb, err := sqlite.Open(f.Name())
if err != nil {
return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err)
}
defer sdb.Close()
nat = sdb
case kindBDB:
f, err := sys.Open(path.Join(db.Path, `Packages`))
if err != nil {
return nil, fmt.Errorf("rpm: error reading bdb db: %w", err)
}
defer f.Close()
r, done, err := mkAt(ctx, db.Kind, f)
if err != nil {
return nil, fmt.Errorf("rpm: error reading bdb db: %w", err)
}
defer done()
var bpdb bdb.PackageDB
if err := bpdb.Parse(r); err != nil {
return nil, fmt.Errorf("rpm: error parsing bdb db: %w", err)
}
nat = &bpdb
case kindNDB:
f, err := sys.Open(path.Join(db.Path, `Packages.db`))
if err != nil {
return nil, fmt.Errorf("rpm: error reading ndb db: %w", err)
}
defer f.Close()
r, done, err := mkAt(ctx, db.Kind, f)
if err != nil {
return nil, fmt.Errorf("rpm: error reading ndb db: %w", err)
}
defer done()
var npdb ndb.PackageDB
if err := npdb.Parse(r); err != nil {
return nil, fmt.Errorf("rpm: error parsing ndb db: %w", err)
}
nat = &npdb
default:
panic("programmer error: bad kind: " + db.Kind.String())
}
if err := nat.Validate(ctx); err != nil {
zlog.Warn(ctx).
Err(err).
Msg("rpm: invalid native DB")
}
return filesFromDB(ctx, nat)
}
73 changes: 73 additions & 0 deletions rpm/filescanner_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package rpm

import (
"context"
"testing"

"github.com/quay/zlog"

"github.com/quay/claircore"
"github.com/quay/claircore/test"
)

var testcases = []struct {
name string
expectedFiles int
ref test.LayerRef
}{
{
name: "python files",
expectedFiles: 821,
ref: test.LayerRef{
Registry: "registry.access.redhat.com",
Name: "ubi9/nodejs-18",
Digest: `sha256:1ae06b64755052cef4c32979aded82a18f664c66fa7b50a6d2924afac2849c6e`,
},
},
}

func TestFileScannerLayer(t *testing.T) {
ctx := zlog.Test(context.Background(), t)
var s FileScanner
a := test.NewCachedArena(t)
t.Cleanup(func() {
if err := a.Close(ctx); err != nil {
t.Error(err)
}
})

for _, tt := range testcases {
t.Run(tt.name, func(t *testing.T) {
ctx := zlog.Test(ctx, t)
a.LoadLayerFromRegistry(ctx, t, tt.ref)
r := a.Realizer(ctx).(*test.CachedRealizer)
t.Cleanup(func() {
if err := r.Close(); err != nil {
t.Error(err)
}
})
ls, err := r.RealizeDescriptions(ctx, []claircore.LayerDescription{
{
Digest: tt.ref.Digest,
URI: "http://example.com",
MediaType: test.MediaType,
Headers: make(map[string][]string),
},
})
if err != nil {
t.Fatal(err)
}

got, err := s.Scan(ctx, &ls[0])
if err != nil {
t.Error(err)
}

t.Logf("found %d files", len(got))
if len(got) != tt.expectedFiles {
t.Fatalf("expected %d files but got %d", tt.expectedFiles, len(got))
}
t.Log(got)
})
}
}
Loading

0 comments on commit 9f58c17

Please sign in to comment.