Skip to content
This repository has been archived by the owner on Nov 5, 2021. It is now read-only.

Commit

Permalink
Fix a goroutine leaking bug in external probe. (#584)
Browse files Browse the repository at this point in the history
Fix a bug where our process-wait goroutine will not exit until the probe start context is explicitly canceled. This bug was introduced when we added handling for clean exit (June 2020: CR 318986425).

Also, add a test that would have caught this bug.

PiperOrigin-RevId: 366547916
  • Loading branch information
manugarg authored Apr 5, 2021
1 parent 925bfed commit 1a77284
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 13 deletions.
38 changes: 25 additions & 13 deletions probes/external/external.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,28 @@ func substituteLabels(in string, labels map[string]string) (string, bool) {
return output, foundAll
}

type command interface {
Wait() error
}

// monitorCommand waits for the process to terminate and sets cmdRunning to
// false when that happens.
func (p *Probe) monitorCommand(startCtx context.Context, cmd command) error {
err := cmd.Wait()

// Spare logging error message if killed explicitly.
select {
case <-startCtx.Done():
return nil
default:
}

if exitErr, ok := err.(*exec.ExitError); ok {
return fmt.Errorf("external probe process died with the status: %s. Stderr: %s", exitErr.Error(), string(exitErr.Stderr))
}
return err
}

func (p *Probe) startCmdIfNotRunning(startCtx context.Context) error {
// Start external probe command if it's not running already. Note that here we
// are trusting the cmdRunning to be set correctly. It can be false for 3 reasons:
Expand Down Expand Up @@ -265,21 +287,11 @@ func (p *Probe) startCmdIfNotRunning(startCtx context.Context) error {
// This goroutine waits for the process to terminate and sets cmdRunning to
// false when that happens.
go func() {
err := cmd.Wait()
if err := p.monitorCommand(startCtx, cmd); err != nil {
p.l.Error(err.Error())
}
close(doneChan)
p.cmdRunning = false

// Spare logging error message if killed explicitly.
select {
case <-startCtx.Done():
return
}

if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
p.l.Errorf("external probe process died with the status: %s. Stderr: %s", exitErr.Error(), string(exitErr.Stderr))
}
}
}()
go p.readProbeReplies(doneChan)
p.cmdRunning = true
Expand Down
91 changes: 91 additions & 0 deletions probes/external/external_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ import (
"bufio"
"bytes"
"context"
"errors"
"fmt"
"io"
"os"
"os/exec"
"reflect"
"strings"
"testing"
Expand Down Expand Up @@ -719,3 +721,92 @@ func TestCommandParsing(t *testing.T) {
t.Errorf("Got command args=%v, want command args=%v", p.cmdArgs, wantArgs)
}
}

type fakeCommand struct {
exitCtx context.Context
startCtx context.Context
waitErr error
}

func (fc *fakeCommand) Wait() error {
select {
case <-fc.exitCtx.Done():
case <-fc.startCtx.Done():
}
return fc.waitErr
}

func TestMonitorCommand(t *testing.T) {
tests := []struct {
desc string
waitErr error
finishCmd bool
cancelCtx bool
wantErr bool
wantStderr bool
}{
{
desc: "Command exit with no error",
finishCmd: true,
wantErr: false,
},
{
desc: "Cancel context, no error",
cancelCtx: true,
wantErr: false,
},
{
desc: "command exit with exit error",
finishCmd: true,
waitErr: &exec.ExitError{Stderr: []byte("exit-error exiting")},
wantErr: true,
wantStderr: true,
},
{
desc: "command exit with no exit error",
finishCmd: true,
waitErr: errors.New("some-error"),
wantErr: true,
wantStderr: false,
},
}

for _, test := range tests {
t.Run(test.desc, func(t *testing.T) {
exitCtx, exitFunc := context.WithCancel(context.Background())
startCtx, startCancelFunc := context.WithCancel(context.Background())
cmd := &fakeCommand{
exitCtx: exitCtx,
startCtx: startCtx,
waitErr: test.waitErr,
}

p := &Probe{}
errCh := make(chan error)
go func() {
errCh <- p.monitorCommand(startCtx, cmd)
}()

if test.finishCmd {
exitFunc()
}
if test.cancelCtx {
startCancelFunc()
}

err := <-errCh
if (err != nil) != test.wantErr {
t.Errorf("Got error: %v, want error?= %v", err, test.wantErr)
}

if err != nil {
if test.wantStderr && !strings.Contains(err.Error(), "Stderr") {
t.Errorf("Want std err: %v, got std err: %v", test.wantStderr, strings.Contains(err.Error(), "Stderr"))
}
}

exitFunc()
startCancelFunc()
})
}
}

0 comments on commit 1a77284

Please sign in to comment.