diff --git a/internal/backends/python/common_words.txt b/internal/backends/python/common_words.txt new file mode 100644 index 00000000..4d6e43cd --- /dev/null +++ b/internal/backends/python/common_words.txt @@ -0,0 +1,56 @@ +account +app +build +cache +command +commands +common +config +core +custom +daemon +demo +deploy +dev +edit +editor +example +examples +install +job +library +local +log +mail +mailer +main +menus +mode +model +models +public +run +sample +samples +schema +server +service +services +session +settings +setup +tasks +test +testapp +tester +testing +testproject +tests +tools +tree +unit +user +util +utilities +utils +web diff --git a/internal/backends/python/grab.go b/internal/backends/python/grab.go index 610e5dee..fcf63328 100644 --- a/internal/backends/python/grab.go +++ b/internal/backends/python/grab.go @@ -2,6 +2,7 @@ package python import ( "context" + _ "embed" "os" "strings" @@ -11,6 +12,11 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer" ) +var ( + //go:embed common_words.txt + common_words_bytes []byte +) + var importsQuery = ` (module [(import_statement @@ -309,13 +315,20 @@ func filterImports(ctx context.Context, foundPkgs map[string]bool, testPypiMap f //nolint:ineffassign,wastedassign,staticcheck span, ctx := tracer.StartSpanFromContext(ctx, "python.grab.filterImports") defer span.Finish() - // filter out stdlib/python internal modules + common_words := make(map[string]bool) + for _, word := range strings.Split(string(common_words_bytes), "\n") { + common_words[word] = true + } + // filter out stdlib/python internal modules, common words for pkg := range foundPkgs { // First path component mod := strings.Split(pkg, ".")[0] if internalModules[mod] { delete(foundPkgs, pkg) } + if common_words[mod] { + delete(foundPkgs, pkg) + } } pkgs := map[string][]api.PkgName{}