From f1bafd3d6779ba83165b406e19fce3dc6e19707d Mon Sep 17 00:00:00 2001 From: crozzy Date: Mon, 1 Apr 2024 09:23:35 -0700 Subject: [PATCH] rpm: implement an RPM filescanner to discern RPM filepaths Using the filepaths discovered by the RPM filescanner we can judge whether or not a language package has been installed via RPM or not. Signed-off-by: crozzy --- file.go | 1 + indexer/controller/coalesce.go | 2 +- indexer/controller/controller.go | 2 +- indexreport.go | 2 +- libindex/libindex.go | 1 + linux/coalescer.go | 6 + rpm/ecosystem.go | 3 + rpm/filescanner.go | 85 ++++++++++ rpm/filescanner_test.go | 75 +++++++++ rpm/native_db.go | 275 ++++++++++++++++++++++++++++++- rpm/packagescanner.go | 191 +-------------------- rpm/resolver.go | 56 +++++++ rpm/resolver_test.go | 220 +++++++++++++++++++++++++ whiteout/coalescer.go | 4 +- whiteout/resolver.go | 28 ++-- whiteout/resolver_test.go | 76 +++++---- 16 files changed, 789 insertions(+), 238 deletions(-) create mode 100644 rpm/filescanner.go create mode 100644 rpm/filescanner_test.go create mode 100644 rpm/resolver.go create mode 100644 rpm/resolver_test.go diff --git a/file.go b/file.go index 5c8bd915a..bf154f8d8 100644 --- a/file.go +++ b/file.go @@ -5,6 +5,7 @@ type FileKind string const ( FileKindWhiteout = FileKind("whiteout") + FileKindRPM = FileKind("rpm") ) // File represents interesting files that are found in the layer. diff --git a/indexer/controller/coalesce.go b/indexer/controller/coalesce.go index d7c1e0afc..95af0fc41 100644 --- a/indexer/controller/coalesce.go +++ b/indexer/controller/coalesce.go @@ -128,7 +128,7 @@ func MergeSR(source *claircore.IndexReport, merge []*claircore.IndexReport) *cla } for k, v := range ir.Files { - source.Files[k] = v + source.Files[k] = append(source.Files[k], v...) } } return source diff --git a/indexer/controller/controller.go b/indexer/controller/controller.go index a39103582..a37b6b51e 100644 --- a/indexer/controller/controller.go +++ b/indexer/controller/controller.go @@ -41,7 +41,7 @@ func New(options *indexer.Options) *Controller { Environments: map[string][]*claircore.Environment{}, Distributions: map[string]*claircore.Distribution{}, Repositories: map[string]*claircore.Repository{}, - Files: map[string]claircore.File{}, + Files: map[string][]claircore.File{}, } s := &Controller{ diff --git a/indexreport.go b/indexreport.go index cf3e8ba37..4e7628e3c 100644 --- a/indexreport.go +++ b/indexreport.go @@ -34,7 +34,7 @@ type IndexReport struct { // an error string in the case the index did not succeed Err string `json:"err"` // Files doesn't end up in the json report but needs to be available at post-coalesce - Files map[string]File `json:"-"` + Files map[string][]File `json:"-"` } // IndexRecords returns a list of IndexRecords derived from the IndexReport diff --git a/libindex/libindex.go b/libindex/libindex.go index b9e8874a1..b5299fc84 100644 --- a/libindex/libindex.go +++ b/libindex/libindex.go @@ -110,6 +110,7 @@ func New(ctx context.Context, opts *Options, cl *http.Client) (*Libindex, error) opts.Ecosystems = append(opts.Ecosystems, whiteout.NewEcosystem(ctx)) opts.Resolvers = []indexer.Resolver{ &whiteout.Resolver{}, + &rpm.Resolver{}, } if cl == nil { diff --git a/linux/coalescer.go b/linux/coalescer.go index 0344ec285..1795a4863 100644 --- a/linux/coalescer.go +++ b/linux/coalescer.go @@ -58,6 +58,12 @@ func (c *Coalescer) Coalesce(ctx context.Context, layerArtifacts []*indexer.Laye for db, pkgs := range tmp { dbs[db] = pkgs } + for _, f := range artifacts.Files { + if c.ir.Files == nil { + c.ir.Files = make(map[string][]claircore.File) + } + c.ir.Files[artifacts.Hash.String()] = append(c.ir.Files[artifacts.Hash.String()], f) + } } for db, packages := range dbs { diff --git a/rpm/ecosystem.go b/rpm/ecosystem.go index 9ee224b5f..e6691ff70 100644 --- a/rpm/ecosystem.go +++ b/rpm/ecosystem.go @@ -28,6 +28,9 @@ func NewEcosystem(_ context.Context) *indexer.Ecosystem { RepositoryScanners: func(ctx context.Context) ([]indexer.RepositoryScanner, error) { return []indexer.RepositoryScanner{}, nil }, + FileScanners: func(ctx context.Context) ([]indexer.FileScanner, error) { + return []indexer.FileScanner{&FileScanner{}}, nil + }, Coalescer: func(ctx context.Context) (indexer.Coalescer, error) { return linux.NewCoalescer(), nil }, diff --git a/rpm/filescanner.go b/rpm/filescanner.go new file mode 100644 index 000000000..dba475823 --- /dev/null +++ b/rpm/filescanner.go @@ -0,0 +1,85 @@ +package rpm + +import ( + "context" + "fmt" + "io/fs" + "runtime/trace" + + "github.com/quay/zlog" + + "github.com/quay/claircore" + "github.com/quay/claircore/indexer" +) + +const ( + scannerName = "rpm" + scannerVersion = "1" + scannerKind = "file" +) + +var ( + _ indexer.FileScanner = (*FileScanner)(nil) + _ indexer.VersionedScanner = (*FileScanner)(nil) +) + +type FileScanner struct{} + +func (*FileScanner) Name() string { return scannerName } + +func (*FileScanner) Version() string { return scannerVersion } + +func (*FileScanner) Kind() string { return scannerKind } + +func (s *FileScanner) Scan(ctx context.Context, layer *claircore.Layer) ([]claircore.File, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + defer trace.StartRegion(ctx, "FileScanner.Scan").End() + trace.Log(ctx, "layer", layer.Hash.String()) + ctx = zlog.ContextWithValues(ctx, + "component", "rpm/FileScanner.Scan", + "version", s.Version(), + "layer", layer.Hash.String()) + zlog.Debug(ctx).Msg("start") + defer zlog.Debug(ctx).Msg("done") + + sys, err := layer.FS() + if err != nil { + return nil, fmt.Errorf("rpm: unable to open layer: %w", err) + } + + found := make([]foundDB, 0) + if err := fs.WalkDir(sys, ".", findDBs(ctx, &found, sys)); err != nil { + return nil, fmt.Errorf("rpm: error walking fs: %w", err) + } + if len(found) == 0 { + return nil, nil + } + + done := map[string]struct{}{} + files := []claircore.File{} + + for _, db := range found { + ctx := zlog.ContextWithValues(ctx, "db", db.String()) + zlog.Debug(ctx).Msg("examining database") + if _, ok := done[db.Path]; ok { + zlog.Debug(ctx).Msg("already seen, skipping") + continue + } + done[db.Path] = struct{}{} + nat, err := dbFileToNativeDB(ctx, sys, db) + if err != nil { + return nil, fmt.Errorf("rpm: error getting native DBs: %w", err) + } + fs, err := filesFromDB(ctx, nat) + if err != nil { + return nil, fmt.Errorf("rpm: error getting files from native DBs: %w", err) + } + files = append(files, fs...) + } + + zlog.Debug(ctx).Int("count", len(found)).Msg("found possible databases") + + return files, nil +} diff --git a/rpm/filescanner_test.go b/rpm/filescanner_test.go new file mode 100644 index 000000000..3a2da02ff --- /dev/null +++ b/rpm/filescanner_test.go @@ -0,0 +1,75 @@ +package rpm + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/quay/zlog" + + "github.com/quay/claircore" + "github.com/quay/claircore/test" +) + +var testcases = []struct { + name string + filename string + expectedFiles int +}{ + { + name: "java layer", + filename: "cdc13a947214994058941dee5dab876369896ec672defa07694cec6dd3fc7ca2", + expectedFiles: 82, + }, + { + name: "open jdk layer", + filename: "f68995d3d7382737a1ee41fb69ca9369693173dba4263233621f4defcb29c4bd", + expectedFiles: 218, + }, +} + +func TestFileScannerLayer(t *testing.T) { + ctx := context.Background() + var s FileScanner + desc := claircore.LayerDescription{ + Digest: test.RandomSHA256Digest(t).String(), + URI: "file:///dev/null", + MediaType: test.MediaType, + Headers: make(map[string][]string), + } + + for _, tt := range testcases { + t.Run(tt.name, func(t *testing.T) { + ctx := zlog.Test(ctx, t) + f, err := os.Open(filepath.Join(`testdata/layers`, tt.filename)) + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { + if err := f.Close(); err != nil { + t.Error(err) + } + }) + var l claircore.Layer + if err := l.Init(ctx, &desc, f); err != nil { + t.Fatal(err) + } + t.Cleanup(func() { + if err := l.Close(); err != nil { + t.Error(err) + } + }) + + got, err := s.Scan(ctx, &l) + if err != nil { + t.Error(err) + } + t.Logf("found %d files", len(got)) + if len(got) != tt.expectedFiles { + t.Fatalf("expected %d files but got %d", tt.expectedFiles, len(got)) + } + t.Log(got) + }) + } +} diff --git a/rpm/native_db.go b/rpm/native_db.go index 1cc43f72e..54aa128a7 100644 --- a/rpm/native_db.go +++ b/rpm/native_db.go @@ -5,6 +5,11 @@ import ( "context" "fmt" "io" + "io/fs" + "os" + "path" + "path/filepath" + "regexp" "runtime/trace" "strings" @@ -12,7 +17,10 @@ import ( "golang.org/x/crypto/openpgp/packet" "github.com/quay/claircore" + "github.com/quay/claircore/rpm/bdb" "github.com/quay/claircore/rpm/internal/rpm" + "github.com/quay/claircore/rpm/ndb" + "github.com/quay/claircore/rpm/sqlite" ) // NativeDB is the interface implemented for in-process RPM database handlers. @@ -21,6 +29,219 @@ type nativeDB interface { Validate(context.Context) error } +// TODO: docs +type dbKind uint + +//go:generate -command stringer go run golang.org/x/tools/cmd/stringer +//go:generate stringer -linecomment -type dbKind + +const ( + _ dbKind = iota + + kindBDB // bdb + kindSQLite // sqlite + kindNDB // ndb +) + +// TODO: docs +type foundDB struct { + Path string + Kind dbKind +} + +func (f foundDB) String() string { + return f.Kind.String() + ":" + f.Path +} + +// TODO: Docs +func dbFileToNativeDB(ctx context.Context, sys fs.FS, db foundDB) (nativeDB, error) { + var nat nativeDB // see native_db.go:/nativeDB + switch db.Kind { + case kindSQLite: + r, err := sys.Open(path.Join(db.Path, `rpmdb.sqlite`)) + if err != nil { + return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) + } + defer func() { + if err := r.Close(); err != nil { + zlog.Warn(ctx).Err(err).Msg("unable to close sqlite db") + } + }() + f, err := os.CreateTemp(os.TempDir(), `rpmdb.sqlite.*`) + if err != nil { + return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) + } + defer func() { + if err := os.Remove(f.Name()); err != nil { + zlog.Error(ctx).Err(err).Msg("unable to unlink sqlite db") + } + if err := f.Close(); err != nil { + zlog.Warn(ctx).Err(err).Msg("unable to close sqlite db") + } + }() + zlog.Debug(ctx).Str("file", f.Name()).Msg("copying sqlite db out of FS") + if _, err := io.Copy(f, r); err != nil { + return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) + } + if err := f.Sync(); err != nil { + return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) + } + sdb, err := sqlite.Open(f.Name()) + if err != nil { + return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) + } + defer sdb.Close() + nat = sdb + case kindBDB: + f, err := sys.Open(path.Join(db.Path, `Packages`)) + if err != nil { + return nil, fmt.Errorf("rpm: error reading bdb db: %w", err) + } + defer f.Close() + r, done, err := mkAt(ctx, db.Kind, f) + if err != nil { + return nil, fmt.Errorf("rpm: error reading bdb db: %w", err) + } + defer done() + var bpdb bdb.PackageDB + if err := bpdb.Parse(r); err != nil { + return nil, fmt.Errorf("rpm: error parsing bdb db: %w", err) + } + nat = &bpdb + case kindNDB: + f, err := sys.Open(path.Join(db.Path, `Packages.db`)) + if err != nil { + return nil, fmt.Errorf("rpm: error reading ndb db: %w", err) + } + defer f.Close() + r, done, err := mkAt(ctx, db.Kind, f) + if err != nil { + return nil, fmt.Errorf("rpm: error reading ndb db: %w", err) + } + defer done() + var npdb ndb.PackageDB + if err := npdb.Parse(r); err != nil { + return nil, fmt.Errorf("rpm: error parsing ndb db: %w", err) + } + nat = &npdb + default: + panic("programmer error: bad kind: " + db.Kind.String()) + } + if err := nat.Validate(ctx); err != nil { + zlog.Warn(ctx). + Err(err). + Msg("rpm: invalid native DB") + } + return nat, nil +} + +func findDBs(ctx context.Context, out *[]foundDB, sys fs.FS) fs.WalkDirFunc { + return func(p string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + return nil + } + + dir, n := path.Split(p) + dir = path.Clean(dir) + switch n { + case `Packages`: + f, err := sys.Open(p) + if err != nil { + return err + } + ok := bdb.CheckMagic(ctx, f) + f.Close() + if !ok { + return nil + } + *out = append(*out, foundDB{ + Path: dir, + Kind: kindBDB, + }) + case `rpmdb.sqlite`: + *out = append(*out, foundDB{ + Path: dir, + Kind: kindSQLite, + }) + case `Packages.db`: + f, err := sys.Open(p) + if err != nil { + return err + } + ok := ndb.CheckMagic(ctx, f) + f.Close() + if !ok { + return nil + } + *out = append(*out, foundDB{ + Path: dir, + Kind: kindNDB, + }) + } + return nil + } +} + +func mkAt(ctx context.Context, k dbKind, f fs.File) (io.ReaderAt, func(), error) { + if r, ok := f.(io.ReaderAt); ok { + return r, func() {}, nil + } + spool, err := os.CreateTemp(os.TempDir(), `Packages.`+k.String()+`.`) + if err != nil { + return nil, nil, fmt.Errorf("rpm: error spooling db: %w", err) + } + ctx = zlog.ContextWithValues(ctx, "file", spool.Name()) + if err := os.Remove(spool.Name()); err != nil { + zlog.Error(ctx).Err(err).Msg("unable to remove spool; file leaked!") + } + zlog.Debug(ctx). + Msg("copying db out of fs.FS") + if _, err := io.Copy(spool, f); err != nil { + if err := spool.Close(); err != nil { + zlog.Warn(ctx).Err(err).Msg("unable to close spool") + } + return nil, nil, fmt.Errorf("rpm: error spooling db: %w", err) + } + return spool, closeSpool(ctx, spool), nil +} + +func closeSpool(ctx context.Context, f *os.File) func() { + return func() { + if err := f.Close(); err != nil { + zlog.Warn(ctx).Err(err).Msg("unable to close spool") + } + } +} + +// TODO: docs +func filesFromDB(ctx context.Context, db nativeDB) ([]claircore.File, error) { + rds, err := db.AllHeaders(ctx) + if err != nil { + return nil, fmt.Errorf("rpm: error reading headers: %w", err) + } + fs := []claircore.File{} + for _, rd := range rds { + var h rpm.Header + if err := h.Parse(ctx, rd); err != nil { + return nil, err + } + var info Info + if err := info.Load(ctx, &h); err != nil { + return nil, err + } + for _, f := range info.Filenames { + fs = append(fs, claircore.File{ + Kind: claircore.FileKindRPM, + Path: f, + }) + } + } + return fs, nil +} + // PackagesFromDB extracts the packages from the RPM headers provided by // the database. func packagesFromDB(ctx context.Context, pkgdb string, db nativeDB) ([]*claircore.Package, error) { @@ -121,7 +342,8 @@ type Info struct { Module string Arch string Digest string - Signature []byte // This is a PGP signature packet. + Signature []byte // This is a PGP signature packet. + Filenames []string // Filtered by the [filePatterns] regexp. DigestAlgo int Epoch int } @@ -129,6 +351,8 @@ type Info struct { // Load populates the receiver with information extracted from the provided // [rpm.Header]. func (i *Info) Load(ctx context.Context, h *rpm.Header) error { + var dirname, basename []string + var dirindex []int32 for idx := range h.Infos { e := &h.Infos[idx] if _, ok := wantTags[e.Tag]; !ok { @@ -159,14 +383,63 @@ func (i *Info) Load(ctx context.Context, h *rpm.Header) error { i.Digest = v.([]string)[0] case rpm.TagSigPGP: i.Signature = v.([]byte) + case rpm.TagDirnames: + dirname = v.([]string) + case rpm.TagDirindexes: + dirindex = v.([]int32) + case rpm.TagBasenames: + basename = v.([]string) + case rpm.TagFilenames: + // Filenames is the tag used in rpm4 -- this is a best-effort for + // supporting it. + for _, name := range v.([]string) { + if !filePatterns.MatchString(name) { + continue + } + i.Filenames = append(i.Filenames, name) + } } } + + // Catch panics from malformed headers. Can't think of a better way to + // handle this. + defer func() { + if r := recover(); r == nil { + return + } + zlog.Warn(ctx). + Str("name", i.Name). + Strs("basename", basename). + Strs("dirname", dirname). + Ints32("dirindex", dirindex). + Msg("caught panic in filename construction") + i.Filenames = nil + }() + for j := range basename { + name := filepath.Join(dirname[dirindex[j]], basename[j]) + if !filePatterns.MatchString(name) { + continue + } + // Record the name as a relative path, as that's what we use everywhere + // else. + i.Filenames = append(i.Filenames, name[1:]) + } return nil } +// FilePatterns is a regular expression for *any* file that may need to be +// recorded alongside a package. +// +// The tested strings are absolute paths. +var filePatterns = regexp.MustCompile(`^.*/[^/]+\.jar$|^/usr/lib/python[23]\.[0-9]+/site-packages/[^/]+\.egg-info/PKG-INFO$`) + var wantTags = map[rpm.Tag]struct{}{ rpm.TagArch: {}, + rpm.TagBasenames: {}, + rpm.TagDirindexes: {}, + rpm.TagDirnames: {}, rpm.TagEpoch: {}, + rpm.TagFilenames: {}, rpm.TagModularityLabel: {}, rpm.TagName: {}, rpm.TagPayloadDigest: {}, diff --git a/rpm/packagescanner.go b/rpm/packagescanner.go index 0d48cf9f5..67ba9c359 100644 --- a/rpm/packagescanner.go +++ b/rpm/packagescanner.go @@ -4,19 +4,13 @@ package rpm import ( "context" "fmt" - "io" "io/fs" - "os" - "path" "runtime/trace" "github.com/quay/zlog" "github.com/quay/claircore" "github.com/quay/claircore/indexer" - "github.com/quay/claircore/rpm/bdb" - "github.com/quay/claircore/rpm/ndb" - "github.com/quay/claircore/rpm/sqlite" ) const ( @@ -89,84 +83,9 @@ func (ps *Scanner) Scan(ctx context.Context, layer *claircore.Layer) ([]*clairco continue } done[db.Path] = struct{}{} - - var nat nativeDB // see native_db.go:/nativeDB - switch db.Kind { - case kindSQLite: - r, err := sys.Open(path.Join(db.Path, `rpmdb.sqlite`)) - if err != nil { - return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) - } - defer func() { - if err := r.Close(); err != nil { - zlog.Warn(ctx).Err(err).Msg("unable to close sqlite db") - } - }() - f, err := os.CreateTemp(os.TempDir(), `rpmdb.sqlite.*`) - if err != nil { - return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) - } - defer func() { - if err := os.Remove(f.Name()); err != nil { - zlog.Error(ctx).Err(err).Msg("unable to unlink sqlite db") - } - if err := f.Close(); err != nil { - zlog.Warn(ctx).Err(err).Msg("unable to close sqlite db") - } - }() - zlog.Debug(ctx).Str("file", f.Name()).Msg("copying sqlite db out of FS") - if _, err := io.Copy(f, r); err != nil { - return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) - } - if err := f.Sync(); err != nil { - return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) - } - sdb, err := sqlite.Open(f.Name()) - if err != nil { - return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) - } - defer sdb.Close() - nat = sdb - case kindBDB: - f, err := sys.Open(path.Join(db.Path, `Packages`)) - if err != nil { - return nil, fmt.Errorf("rpm: error reading bdb db: %w", err) - } - defer f.Close() - r, done, err := mkAt(ctx, db.Kind, f) - if err != nil { - return nil, fmt.Errorf("rpm: error reading bdb db: %w", err) - } - defer done() - var bpdb bdb.PackageDB - if err := bpdb.Parse(r); err != nil { - return nil, fmt.Errorf("rpm: error parsing bdb db: %w", err) - } - nat = &bpdb - case kindNDB: - f, err := sys.Open(path.Join(db.Path, `Packages.db`)) - if err != nil { - return nil, fmt.Errorf("rpm: error reading ndb db: %w", err) - } - defer f.Close() - r, done, err := mkAt(ctx, db.Kind, f) - if err != nil { - return nil, fmt.Errorf("rpm: error reading ndb db: %w", err) - } - defer done() - var npdb ndb.PackageDB - if err := npdb.Parse(r); err != nil { - return nil, fmt.Errorf("rpm: error parsing ndb db: %w", err) - } - nat = &npdb - default: - panic("programmer error: bad kind: " + db.Kind.String()) - } - if err := nat.Validate(ctx); err != nil { - zlog.Warn(ctx). - Err(err). - Msg("rpm: invalid native DB") - continue + nat, err := dbFileToNativeDB(ctx, sys, db) + if err != nil { + return nil, fmt.Errorf("rpm: error getting native DBs: %w", err) } ps, err := packagesFromDB(ctx, db.String(), nat) if err != nil { @@ -174,109 +93,5 @@ func (ps *Scanner) Scan(ctx context.Context, layer *claircore.Layer) ([]*clairco } pkgs = append(pkgs, ps...) } - return pkgs, nil } - -func findDBs(ctx context.Context, out *[]foundDB, sys fs.FS) fs.WalkDirFunc { - return func(p string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - if d.IsDir() { - return nil - } - - dir, n := path.Split(p) - dir = path.Clean(dir) - switch n { - case `Packages`: - f, err := sys.Open(p) - if err != nil { - return err - } - ok := bdb.CheckMagic(ctx, f) - f.Close() - if !ok { - return nil - } - *out = append(*out, foundDB{ - Path: dir, - Kind: kindBDB, - }) - case `rpmdb.sqlite`: - *out = append(*out, foundDB{ - Path: dir, - Kind: kindSQLite, - }) - case `Packages.db`: - f, err := sys.Open(p) - if err != nil { - return err - } - ok := ndb.CheckMagic(ctx, f) - f.Close() - if !ok { - return nil - } - *out = append(*out, foundDB{ - Path: dir, - Kind: kindNDB, - }) - } - return nil - } -} - -func mkAt(ctx context.Context, k dbKind, f fs.File) (io.ReaderAt, func(), error) { - if r, ok := f.(io.ReaderAt); ok { - return r, func() {}, nil - } - spool, err := os.CreateTemp(os.TempDir(), `Packages.`+k.String()+`.`) - if err != nil { - return nil, nil, fmt.Errorf("rpm: error spooling db: %w", err) - } - ctx = zlog.ContextWithValues(ctx, "file", spool.Name()) - if err := os.Remove(spool.Name()); err != nil { - zlog.Error(ctx).Err(err).Msg("unable to remove spool; file leaked!") - } - zlog.Debug(ctx). - Msg("copying db out of fs.FS") - if _, err := io.Copy(spool, f); err != nil { - if err := spool.Close(); err != nil { - zlog.Warn(ctx).Err(err).Msg("unable to close spool") - } - return nil, nil, fmt.Errorf("rpm: error spooling db: %w", err) - } - return spool, closeSpool(ctx, spool), nil -} - -func closeSpool(ctx context.Context, f *os.File) func() { - return func() { - if err := f.Close(); err != nil { - zlog.Warn(ctx).Err(err).Msg("unable to close spool") - } - } -} - -type dbKind uint - -//go:generate -command stringer go run golang.org/x/tools/cmd/stringer -//go:generate stringer -linecomment -type dbKind - -const ( - _ dbKind = iota - - kindBDB // bdb - kindSQLite // sqlite - kindNDB // ndb -) - -type foundDB struct { - Path string - Kind dbKind -} - -func (f foundDB) String() string { - return f.Kind.String() + ":" + f.Path -} diff --git a/rpm/resolver.go b/rpm/resolver.go new file mode 100644 index 000000000..9479cd21d --- /dev/null +++ b/rpm/resolver.go @@ -0,0 +1,56 @@ +package rpm + +import ( + "context" + + "github.com/quay/zlog" + + "github.com/quay/claircore" + "github.com/quay/claircore/indexer" +) + +var ( + _ indexer.Resolver = (*Resolver)(nil) +) + +type Resolver struct{} + +func (r *Resolver) Resolve(ctx context.Context, ir *claircore.IndexReport, layers []*claircore.Layer) *claircore.IndexReport { + finalPackages := map[string]*claircore.Package{} + finalEnvironments := map[string][]*claircore.Environment{} + for pkgID, pkg := range ir.Packages { + isRPMPackage := false + for i := 0; i < len(ir.Environments[pkgID]); i++ { + if ir.Environments[pkgID][i].RepositoryIDs != nil { + for _, rID := range ir.Environments[pkgID][i].RepositoryIDs { + r := ir.Repositories[rID] + if r.Key == "rhel-cpe-repository" { + isRPMPackage = true + goto found + } + } + } + } + found: + isRemovable := false + for _, fs := range ir.Files { + for _, f := range fs { + if f.Kind == claircore.FileKindRPM && !isRPMPackage && pkg.Filepath == f.Path { + isRemovable = true + zlog.Debug(ctx). + Str("package name", pkg.Name). + Str("package file", pkg.Filepath). + Str("rpm file", f.Path). + Msg("package determined to have come from RPM, deleting") + } + } + } + if !isRemovable { + finalPackages[pkgID] = pkg + finalEnvironments[pkgID] = ir.Environments[pkgID] + } + } + ir.Packages = finalPackages + ir.Environments = finalEnvironments + return ir +} diff --git a/rpm/resolver_test.go b/rpm/resolver_test.go new file mode 100644 index 000000000..3d1a225b0 --- /dev/null +++ b/rpm/resolver_test.go @@ -0,0 +1,220 @@ +package rpm + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/quay/zlog" + + "github.com/quay/claircore" + "github.com/quay/claircore/pkg/cpe" +) + +var resolverTestcases = []struct { + name string + expectedPackages int + indexReport *claircore.IndexReport +}{ + { + name: "No files", + expectedPackages: 2, + indexReport: &claircore.IndexReport{ + Hash: claircore.MustParseDigest(`sha256:` + strings.Repeat(`a`, 64)), + Packages: map[string]*claircore.Package{ + "123": { + ID: "123", + Name: "package A", + Version: "v1.0.0", + Source: &claircore.Package{ + ID: "122", + Name: "package B source", + Kind: claircore.SOURCE, + Version: "v1.0.0", + }, + Kind: claircore.BINARY, + }, + "456": { + ID: "456", + Name: "package B", + Version: "v2.0.0", + Kind: claircore.BINARY, + }, + }, + Environments: map[string][]*claircore.Environment{ + "123": { + { + PackageDB: "bdb:var/lib/rpm", + IntroducedIn: claircore.MustParseDigest(`sha256:` + strings.Repeat(`b`, 64)), + RepositoryIDs: []string{"11"}, + DistributionID: "13", + }, + }, + "456": { + { + PackageDB: "maven:opt/couchbase/lib/cbas/repo/eventstream-1.0.1.jar", + IntroducedIn: claircore.MustParseDigest(`sha256:` + strings.Repeat(`c`, 64)), + RepositoryIDs: []string{"12"}, + }, + }, + }, + Repositories: map[string]*claircore.Repository{ + "11": { + ID: "11", + Name: "cpe:/a:redhat:rhel_eus:8.6::appstream", + Key: "rhel-cpe-repository", + CPE: cpe.MustUnbind("cpe:2.3:a:redhat:rhel_eus:8.6:*:appstream:*:*:*:*:*"), + }, + "12": { + ID: "12", + Name: "maven", + URI: "https://repo1.maven.apache.org/maven2", + }, + }, + Distributions: map[string]*claircore.Distribution{ + "13": { + ID: "13", + DID: "rhel", + Name: "Red Hat Enterprise Linux Server", + Version: "7", + VersionID: "7", + CPE: cpe.MustUnbind("cpe:2.3:o:redhat:enterprise_linux:7:*:*:*:*:*:*:*"), + PrettyName: "Red Hat Enterprise Linux Server 7", + }, + }, + Success: true, + }, + }, + { + name: "an RPM and a native JAVA package", + expectedPackages: 1, + indexReport: &claircore.IndexReport{ + Hash: claircore.MustParseDigest(`sha256:` + strings.Repeat(`a`, 64)), + Packages: map[string]*claircore.Package{ + "123": { + ID: "123", + Name: "rpm java package A", + Version: "v2.0.0-1-1", + Source: &claircore.Package{ + ID: "122", + Name: "rpm java package A source", + Kind: claircore.SOURCE, + Version: "v2.0.0-1-1", + }, + Kind: claircore.BINARY, + Filepath: "some/rpm/filepath.rpm", + }, + "456": { + ID: "456", + Name: "java package A", + Version: "v2.0.0", + Kind: claircore.BINARY, + Filepath: "an/actual/rpm/filepath.java", + }, + }, + Files: map[string][]claircore.File{ + "111": { + {Kind: claircore.FileKindRPM, Path: "some/rpm/filepath/one.java"}, + {Kind: claircore.FileKindRPM, Path: "some/rpm/filepath/two.java"}, + {Kind: claircore.FileKindRPM, Path: "an/actual/rpm/filepath.java"}, + }, + }, + Environments: map[string][]*claircore.Environment{ + "123": { + { + PackageDB: "bdb:var/lib/rpm", + IntroducedIn: claircore.MustParseDigest(`sha256:` + strings.Repeat(`b`, 64)), + RepositoryIDs: []string{"11"}, + DistributionID: "13", + }, + }, + "456": { + { + PackageDB: "maven:opt/couchbase/lib/cbas/repo/eventstream-1.0.1.jar", + IntroducedIn: claircore.MustParseDigest(`sha256:` + strings.Repeat(`c`, 64)), + RepositoryIDs: []string{"12"}, + }, + }, + }, + Repositories: map[string]*claircore.Repository{ + "11": { + ID: "11", + Name: "cpe:/a:redhat:rhel_eus:8.6::appstream", + Key: "rhel-cpe-repository", + CPE: cpe.MustUnbind("cpe:2.3:a:redhat:rhel_eus:8.6:*:appstream:*:*:*:*:*"), + }, + "12": { + ID: "12", + Name: "maven", + URI: "https://repo1.maven.apache.org/maven2", + }, + }, + Distributions: map[string]*claircore.Distribution{ + "13": { + ID: "13", + DID: "rhel", + Name: "Red Hat Enterprise Linux Server", + Version: "7", + VersionID: "7", + CPE: cpe.MustUnbind("cpe:2.3:o:redhat:enterprise_linux:7:*:*:*:*:*:*:*"), + PrettyName: "Red Hat Enterprise Linux Server 7", + }, + }, + Success: true, + }, + }, +} + +func TestResolver(t *testing.T) { + ctx := context.Background() + for _, tt := range resolverTestcases { + t.Run(tt.name, func(t *testing.T) { + r := &Resolver{} + ir := r.Resolve(ctx, tt.indexReport, nil) + if len(ir.Packages) != tt.expectedPackages { + t.Errorf("expected %d packages but got %d", tt.expectedPackages, len(ir.Packages)) + } + }) + } +} + +var indexReportTestcases = []struct { + name string + filename string + expectedPackages int +}{ + { + name: "open jdk 18 index report", + filename: "openjdk_18.json", + expectedPackages: 218, + }, +} + +func TestIndexReport(t *testing.T) { + ctx := context.Background() + var r Resolver + for _, tt := range indexReportTestcases { + t.Run(tt.name, func(t *testing.T) { + ctx := zlog.Test(ctx, t) + fp := filepath.Join(`testdata/index-report/`, tt.filename) + v, err := os.ReadFile(fp) + if err != nil { + t.Fatal(err) + } + ir := &claircore.IndexReport{} + if err = json.Unmarshal(v, ir); err != nil { + t.Fatalf("error opening file %q: %v", fp, err) + } + + got := r.Resolve(ctx, ir, nil) + t.Logf("found %d files", len(got.Packages)) + if len(got.Packages) != tt.expectedPackages { + t.Fatalf("expected %d packages but got %d", tt.expectedPackages, len(got.Packages)) + } + t.Log(got) + }) + } +} diff --git a/whiteout/coalescer.go b/whiteout/coalescer.go index 0e7a3cac0..4e2c5fba5 100644 --- a/whiteout/coalescer.go +++ b/whiteout/coalescer.go @@ -14,9 +14,9 @@ func (c *coalescer) Coalesce(ctx context.Context, layerArtifacts []*indexer.Laye for _, l := range layerArtifacts { for _, f := range l.Files { if ir.Files == nil { - ir.Files = make(map[string]claircore.File) + ir.Files = make(map[string][]claircore.File) } - ir.Files[l.Hash.String()] = f + ir.Files[l.Hash.String()] = append(ir.Files[l.Hash.String()], f) } } return ir, nil diff --git a/whiteout/resolver.go b/whiteout/resolver.go index d98cac5f1..e05f42ea0 100644 --- a/whiteout/resolver.go +++ b/whiteout/resolver.go @@ -32,19 +32,21 @@ func (r *Resolver) Resolve(ctx context.Context, ir *claircore.IndexReport, layer packageLayer = ir.Environments[pkgID][i].IntroducedIn.String() } } - for fileLayer, f := range ir.Files { - // Check if it's a whiteout file, if it applies to the package's - // filepath and if the layer the whiteout file came from came after. - // The spec states: "Whiteout files MUST only apply to resources in - // lower/parent layers" hence why we don't check if they're in the same - // layer. - if f.Kind == claircore.FileKindWhiteout && ls.isChildOf(fileLayer, packageLayer) && fileIsDeleted(pkg.Filepath, f.Path) { - packageDeleted = true - zlog.Debug(ctx). - Str("package name", pkg.Name). - Str("package file", pkg.Filepath). - Str("whiteout file", f.Path). - Msg("package determined to be deleted") + for fileLayer, fs := range ir.Files { + for _, f := range fs { + // Check if it's a whiteout file, if it applies to the package's + // filepath and if the layer the whiteout file came from came after. + // The spec states: "Whiteout files MUST only apply to resources in + // lower/parent layers" hence why we don't check if they're in the same + // layer. + if f.Kind == claircore.FileKindWhiteout && ls.isChildOf(fileLayer, packageLayer) && fileIsDeleted(pkg.Filepath, f.Path) { + packageDeleted = true + zlog.Debug(ctx). + Str("package name", pkg.Name). + Str("package file", pkg.Filepath). + Str("whiteout file", f.Path). + Msg("package determined to be deleted") + } } } if !packageDeleted { diff --git a/whiteout/resolver_test.go b/whiteout/resolver_test.go index 0212435ad..342a5f9c3 100644 --- a/whiteout/resolver_test.go +++ b/whiteout/resolver_test.go @@ -51,10 +51,12 @@ func TestResolver(t *testing.T) { "1": {{IntroducedIn: firstLayerHash}}, "2": {{IntroducedIn: firstLayerHash}}, }, - Files: map[string]claircore.File{ + Files: map[string][]claircore.File{ secondLayerHash.String(): { - Path: "a/path/to/some/file/site-packages/.wh.a_package", - Kind: claircore.FileKindWhiteout, + { + Path: "a/path/to/some/file/site-packages/.wh.a_package", + Kind: claircore.FileKindWhiteout, + }, }, }, }, @@ -82,18 +84,20 @@ func TestResolver(t *testing.T) { "1": {{IntroducedIn: firstLayerHash}}, "2": {{IntroducedIn: firstLayerHash}}, }, - Files: map[string]claircore.File{ + Files: map[string][]claircore.File{ secondLayerHash.String(): { - Path: "a/path/to/some/different_file/site-packages/.wh.a_package", - Kind: claircore.FileKindWhiteout, - }, - secondLayerHash.String(): { - Path: "a/path/to/some/different_file/.wh.site-packages", - Kind: claircore.FileKindWhiteout, - }, - secondLayerHash.String(): { - Path: "a/path/to/some/.wh.different_file", - Kind: claircore.FileKindWhiteout, + { + Path: "a/path/to/some/different_file/site-packages/.wh.a_package", + Kind: claircore.FileKindWhiteout, + }, + { + Path: "a/path/to/some/different_file/.wh.site-packages", + Kind: claircore.FileKindWhiteout, + }, + { + Path: "a/path/to/some/.wh.different_file", + Kind: claircore.FileKindWhiteout, + }, }, }, }, @@ -121,10 +125,12 @@ func TestResolver(t *testing.T) { "1": {{IntroducedIn: firstLayerHash}}, "2": {{IntroducedIn: firstLayerHash}}, }, - Files: map[string]claircore.File{ + Files: map[string][]claircore.File{ secondLayerHash.String(): { - Path: "a/path/to/some/file/.wh.site-packages", - Kind: claircore.FileKindWhiteout, + { + Path: "a/path/to/some/file/.wh.site-packages", + Kind: claircore.FileKindWhiteout, + }, }, }, }, @@ -152,10 +158,12 @@ func TestResolver(t *testing.T) { "1": {{IntroducedIn: firstLayerHash}}, "2": {{IntroducedIn: firstLayerHash}}, }, - Files: map[string]claircore.File{ - firstLayerHash.String(): { // whiteout is in the same layer as packages - Path: "a/path/to/some/file/site-packages/.wh.b_package", - Kind: claircore.FileKindWhiteout, + Files: map[string][]claircore.File{ + firstLayerHash.String(): { + { // whiteout is in the same layer as packages + Path: "a/path/to/some/file/site-packages/.wh.b_package", + Kind: claircore.FileKindWhiteout, + }, }, }, }, @@ -183,10 +191,12 @@ func TestResolver(t *testing.T) { "1": {{IntroducedIn: firstLayerHash}}, "2": {{IntroducedIn: firstLayerHash}}, }, - Files: map[string]claircore.File{ + Files: map[string][]claircore.File{ secondLayerHash.String(): { - Path: "a/path/to/some/file/site/.wh..wh..opq", - Kind: claircore.FileKindWhiteout, + { + Path: "a/path/to/some/file/site/.wh..wh..opq", + Kind: claircore.FileKindWhiteout, + }, }, }, }, @@ -214,10 +224,12 @@ func TestResolver(t *testing.T) { "1": {{IntroducedIn: firstLayerHash}}, "2": {{IntroducedIn: firstLayerHash}}, }, - Files: map[string]claircore.File{ + Files: map[string][]claircore.File{ secondLayerHash.String(): { - Path: "a/path/to/some/file/site-packages/.wh..wh..opq", - Kind: claircore.FileKindWhiteout, + { + Path: "a/path/to/some/file/site-packages/.wh..wh..opq", + Kind: claircore.FileKindWhiteout, + }, }, }, }, @@ -240,10 +252,12 @@ func TestResolver(t *testing.T) { Environments: map[string][]*claircore.Environment{ "1": {{IntroducedIn: firstLayerHash}, {IntroducedIn: thirdLayerHash}}, }, - Files: map[string]claircore.File{ - secondLayerHash.String(): { // whiteout is sandwiched - Path: "a/path/to/some/file/site-packages/.wh.a_package", - Kind: claircore.FileKindWhiteout, + Files: map[string][]claircore.File{ + secondLayerHash.String(): { + { // whiteout is sandwiched + Path: "a/path/to/some/file/site-packages/.wh.a_package", + Kind: claircore.FileKindWhiteout, + }, }, }, },