Skip to content

Commit

Permalink
rpm: implement an RPM filescanner to discern RPM filepaths
Browse files Browse the repository at this point in the history
Using the filepaths discovered by the RPM filescanner we can judge
whether or not a language package has been installed via RPM or not.

Signed-off-by: crozzy <[email protected]>
  • Loading branch information
crozzy committed May 22, 2024
1 parent 942e2a0 commit deaae6d
Show file tree
Hide file tree
Showing 7 changed files with 405 additions and 190 deletions.
1 change: 1 addition & 0 deletions file.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ type FileKind string

const (
FileKindWhiteout = FileKind("whiteout")
FileKindRPM = FileKind("rpm")
)

// File represents interesting files that are found in the layer.
Expand Down
6 changes: 6 additions & 0 deletions linux/coalescer.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ func (c *Coalescer) Coalesce(ctx context.Context, layerArtifacts []*indexer.Laye
for db, pkgs := range tmp {
dbs[db] = pkgs
}
for _, f := range artifacts.Files {
if c.ir.Files == nil {
c.ir.Files = make(map[string][]claircore.File)
}
c.ir.Files[artifacts.Hash.String()] = append(c.ir.Files[artifacts.Hash.String()], f)
}
}

for db, packages := range dbs {
Expand Down
3 changes: 3 additions & 0 deletions rpm/ecosystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ func NewEcosystem(_ context.Context) *indexer.Ecosystem {
RepositoryScanners: func(ctx context.Context) ([]indexer.RepositoryScanner, error) {
return []indexer.RepositoryScanner{}, nil
},
FileScanners: func(ctx context.Context) ([]indexer.FileScanner, error) {
return []indexer.FileScanner{&FileScanner{}}, nil
},
Coalescer: func(ctx context.Context) (indexer.Coalescer, error) {
return linux.NewCoalescer(), nil
},
Expand Down
90 changes: 90 additions & 0 deletions rpm/filescanner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package rpm

import (
"context"
"fmt"
"io/fs"
"runtime/trace"

"github.com/quay/zlog"

"github.com/quay/claircore"
"github.com/quay/claircore/indexer"
)

const (
scannerName = "rpm"
scannerVersion = "1"
scannerKind = "file"
)

var (
_ indexer.FileScanner = (*FileScanner)(nil)
_ indexer.VersionedScanner = (*FileScanner)(nil)
)

// FileScanner implements [indexer.FileScanner], it examines RPM
// databases and reports installed files.
type FileScanner struct{}

// Name implements [indexer.VersionedScanner].
func (*FileScanner) Name() string { return scannerName }

// Version implements [indexer.VersionedScanner].
func (*FileScanner) Version() string { return scannerVersion }

// Kind implements [indexer.VersionedScanner]
func (*FileScanner) Kind() string { return scannerKind }

// Scan reports any found Files that were installed via RPMs in the
// layer.
//
// It's an expected outcome to return (nil, nil) no RPM packages are found in the Layer.
func (s *FileScanner) Scan(ctx context.Context, layer *claircore.Layer) ([]claircore.File, error) {
if err := ctx.Err(); err != nil {
return nil, err
}
defer trace.StartRegion(ctx, "FileScanner.Scan").End()
trace.Log(ctx, "layer", layer.Hash.String())
ctx = zlog.ContextWithValues(ctx,
"component", "rpm/FileScanner.Scan",
"version", s.Version(),
"layer", layer.Hash.String())
zlog.Debug(ctx).Msg("start")
defer zlog.Debug(ctx).Msg("done")

sys, err := layer.FS()
if err != nil {
return nil, fmt.Errorf("rpm: unable to open layer: %w", err)
}

found := make([]foundDB, 0)
if err := fs.WalkDir(sys, ".", findDBs(ctx, &found, sys)); err != nil {
return nil, fmt.Errorf("rpm: error walking fs: %w", err)
}
if len(found) == 0 {
return nil, nil
}

done := map[string]struct{}{}
files := []claircore.File{}

for _, db := range found {
ctx := zlog.ContextWithValues(ctx, "db", db.String())
zlog.Debug(ctx).Msg("examining database")
if _, ok := done[db.Path]; ok {
zlog.Debug(ctx).Msg("already seen, skipping")
continue
}
done[db.Path] = struct{}{}
fs, err := getDBObjects(ctx, sys, db, filesFromDB)
if err != nil {
return nil, fmt.Errorf("rpm: error getting native DBs: %w", err)
}
files = append(files, fs...)
}

zlog.Debug(ctx).Int("count", len(found)).Msg("found possible databases")

return files, nil
}
73 changes: 73 additions & 0 deletions rpm/filescanner_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package rpm

import (
"context"
"testing"

"github.com/quay/zlog"

"github.com/quay/claircore"
"github.com/quay/claircore/test"
)

var testcases = []struct {
name string
expectedFiles int
ref test.LayerRef
}{
{
name: "python files",
expectedFiles: 821,
ref: test.LayerRef{
Registry: "registry.access.redhat.com",
Name: "ubi9/nodejs-18",
Digest: `sha256:1ae06b64755052cef4c32979aded82a18f664c66fa7b50a6d2924afac2849c6e`,
},
},
}

func TestFileScannerLayer(t *testing.T) {
ctx := zlog.Test(context.Background(), t)
var s FileScanner
a := test.NewCachedArena(t)
t.Cleanup(func() {
if err := a.Close(ctx); err != nil {
t.Error(err)
}
})

for _, tt := range testcases {
t.Run(tt.name, func(t *testing.T) {
ctx := zlog.Test(ctx, t)
a.LoadLayerFromRegistry(ctx, t, tt.ref)
r := a.Realizer(ctx).(*test.CachedRealizer)
t.Cleanup(func() {
if err := r.Close(); err != nil {
t.Error(err)
}
})
ls, err := r.RealizeDescriptions(ctx, []claircore.LayerDescription{
{
Digest: tt.ref.Digest,
URI: "http://example.com",
MediaType: test.MediaType,
Headers: make(map[string][]string),
},
})
if err != nil {
t.Fatal(err)
}

got, err := s.Scan(ctx, &ls[0])
if err != nil {
t.Error(err)
}

t.Logf("found %d files", len(got))
if len(got) != tt.expectedFiles {
t.Fatalf("expected %d files but got %d", tt.expectedFiles, len(got))
}
t.Log(got)
})
}
}
Loading

0 comments on commit deaae6d

Please sign in to comment.