diff --git a/pkg/languages/python/.snapshots/TestFlow--flow.yml b/pkg/languages/python/.snapshots/TestFlow--flow.yml index 9afe42bcb..420bcad57 100644 --- a/pkg/languages/python/.snapshots/TestFlow--flow.yml +++ b/pkg/languages/python/.snapshots/TestFlow--flow.yml @@ -55,4 +55,32 @@ high: parent_line_number: 7 fingerprint: 22039dd750c8bd604904ee9f5bc626f0_1 old_fingerprint: 22039dd750c8bd604904ee9f5bc626f0_1 + - rule: + cwe_ids: + - "42" + id: flow_test + title: Test dataflow and variables + description: Test dataflow and variables + documentation_url: "" + line_number: 13 + full_filename: flow.py + filename: flow.py + source: + location: + start: 13 + end: 13 + column: + start: 5 + end: 19 + sink: + location: + start: 13 + end: 13 + column: + start: 5 + end: 19 + content: "" + parent_line_number: 13 + fingerprint: 22039dd750c8bd604904ee9f5bc626f0_2 + old_fingerprint: 22039dd750c8bd604904ee9f5bc626f0_2 diff --git a/pkg/languages/python/analyzer/analyzer.go b/pkg/languages/python/analyzer/analyzer.go index 21253c7ee..49eca350d 100644 --- a/pkg/languages/python/analyzer/analyzer.go +++ b/pkg/languages/python/analyzer/analyzer.go @@ -9,6 +9,13 @@ import ( "github.com/bearer/bearer/pkg/scanner/language" ) +var reflexiveMethods = []string{ + "decode", + "encode", + "format", + "replace", +} + type analyzer struct { builder *tree.Builder scope *language.Scope @@ -23,7 +30,7 @@ func New(builder *tree.Builder) language.Analyzer { func (analyzer *analyzer) Analyze(node *sitter.Node, visitChildren func() error) error { switch node.Type() { - case "class_definition", "block", "function_definition": + case "class_definition", "function_definition": return analyzer.withScope(language.NewScope(analyzer.scope), func() error { return visitChildren() }) @@ -100,10 +107,16 @@ func (analyzer *analyzer) analyzeAssignment(node *sitter.Node, visitChildren fun // foo.bar(a, b) func (analyzer *analyzer) analyzeCall(node *sitter.Node, visitChildren func() error) error { - if receiver := node.ChildByFieldName("function"); receiver != nil { - analyzer.lookupVariable(receiver) - - analyzer.builder.Dataflow(node, receiver) + if function := node.ChildByFieldName("function"); function != nil { + object := function.ChildByFieldName("object") + analyzer.lookupVariable(object) + + if function.Type() == "attribute" { + attribute := function.ChildByFieldName("attribute") + if attribute.Type() == "identifier" && slices.Contains(reflexiveMethods, analyzer.builder.ContentFor(attribute)) { + analyzer.builder.Dataflow(node, object) + } + } } if argumentsNode := node.ChildByFieldName("arguments"); argumentsNode != nil { diff --git a/pkg/languages/python/testdata/flow/flow.py b/pkg/languages/python/testdata/flow/flow.py index c196f15b8..0063476c1 100644 --- a/pkg/languages/python/testdata/flow/flow.py +++ b/pkg/languages/python/testdata/flow/flow.py @@ -1,10 +1,22 @@ def with_statement(): with source() as value, other: cursor_sink(value) - + def for_statement(): for value in source(): result_sink(value) cursor_sink(value) # no match +def reflexive_methods(): + s = source() + x = s.format("hello") + result_sink(x) + cursor_sink(x) # no match + +def non_reflexive_methods(): + s = source() + x = s.my_method("hello") + result_sink(x) # no match + cursor_sink(x) # no match + cursor_sink(value) # no match