From b7f82bc1b109cf56ea7b5c902cdf117f1df0d640 Mon Sep 17 00:00:00 2001 From: Brian Gibbon Date: Mon, 29 Apr 2024 19:18:02 +0000 Subject: [PATCH 1/5] Return error if there are CheckErrors or ReportErrors --- Makefile | 4 +- pipeline/endpoints/servicecontrol.go | 61 ++++++++- pipeline/endpoints/servicecontrol_test.go | 157 ++++++++++++++++++++-- 3 files changed, 204 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index 2d05dc8..386665e 100644 --- a/Makefile +++ b/Makefile @@ -63,11 +63,11 @@ test: .GOPATH/.ok # $Q go test $(if $V,-v) -i -race $(allpackages) # install -race libs to speed up next run ifndef CI $Q go vet $(allpackages) - $Q GODEBUG=cgocheck=2 go test -race $(allpackages) + $Q GOEXPERIMENT=cgocheck2 go test -race $(allpackages) else $Q ( go vet $(allpackages); echo $$? ) | \ tee .GOPATH/test/vet.txt | sed '$$ d'; exit $$(tail -1 .GOPATH/test/vet.txt) - $Q ( GODEBUG=cgocheck=2 go test -v -race $(allpackages); echo $$? ) | \ + $Q ( GOEXPERIMENT=cgocheck2 go test -v -race $(allpackages); echo $$? ) | \ tee .GOPATH/test/output.txt | sed '$$ d'; exit $$(tail -1 .GOPATH/test/output.txt) endif diff --git a/pipeline/endpoints/servicecontrol.go b/pipeline/endpoints/servicecontrol.go index c8e7326..dec1839 100644 --- a/pipeline/endpoints/servicecontrol.go +++ b/pipeline/endpoints/servicecontrol.go @@ -16,19 +16,19 @@ package endpoints import ( "context" + "errors" "fmt" "net" "time" - "github.com/GoogleCloudPlatform/ubbagent/metrics" - "github.com/GoogleCloudPlatform/ubbagent/clock" + "github.com/GoogleCloudPlatform/ubbagent/metrics" "github.com/GoogleCloudPlatform/ubbagent/pipeline" + "github.com/GoogleCloudPlatform/ubbagent/util" "github.com/golang/glog" "golang.org/x/oauth2/google" "google.golang.org/api/googleapi" "google.golang.org/api/servicecontrol/v1" - "github.com/GoogleCloudPlatform/ubbagent/util" ) const ( @@ -49,6 +49,11 @@ type ServiceControlEndpoint struct { clock clock.Clock } +type checkError struct { + err error + transient bool +} + // NewServiceControlEndpoint creates a new ServiceControlEndpoint. func NewServiceControlEndpoint(name, serviceName, agentId string, consumerId string, jsonKey []byte) (*ServiceControlEndpoint, error) { config, err := google.JWTConfigFromJSON(jsonKey, servicecontrol.ServicecontrolScope) @@ -98,19 +103,34 @@ func (ep *ServiceControlEndpoint) Send(report pipeline.EndpointReport) error { checkReq := &servicecontrol.CheckRequest{ Operation: &opNoLabels, } - _, err := ep.service.Services.Check(ep.serviceName, checkReq).Do() + checkResp, err := ep.service.Services.Check(ep.serviceName, checkReq).Do() if err != nil && !googleapi.IsNotModified(err) { return err } + + if len(checkResp.CheckErrors) > 0 { + return checkErrorsToError(checkResp.CheckErrors) + } + ep.nextCheck = ep.clock.Now().Add(checkCacheTimeout) } - _, err := ep.service.Services.Report(ep.serviceName, req).Do() + resp, err := ep.service.Services.Report(ep.serviceName, req).Do() if err != nil && !googleapi.IsNotModified(err) { return err } + + // This will retry reporting all operations. + // However, identical operations are de-duped for billing + if len(resp.ReportErrors) > 0 { + var errs []error + for _, reportErr := range resp.ReportErrors { + errs = append(errs, reportErrorToError(reportErr)) + } + return errors.Join(errs...) + } + glog.V(2).Infoln("ServiceControlEndpoint:Send(): success") - // TODO(volkman): Handle potential per-operation errors in response body return nil } @@ -175,9 +195,38 @@ func (ep *ServiceControlEndpoint) IsTransient(err error) bool { case net.Error: // Return true if this error is considered temporary or a timeout. return v.Temporary() || v.Timeout() + case *checkError: + return v.transient default: // Some non-http error (perhaps a connection refused or timeout?) // We'll retry. return true } } + +func checkErrorsToError(checkErrors []*servicecontrol.CheckError) error { + var errs []error + var transient = true + for _, checkError := range checkErrors { + fmt.Println("Check error", checkError.Code) + switch checkError.Code { + // These errors indicate customer disabling billing and + // is not retriable. See: https://cloud.google.com/marketplace/docs/partners/integrated-saas/backend-integration#for_usage-based_pricing_reporting_usage_to_google + case "BILLING_DISABLED", "SERVICE_NOT_ACTIVATED", "PROJECT_DELETED": + transient = false + fmt.Println("Transient") + } + bytes, _ := checkError.MarshalJSON() + errs = append(errs, errors.New(string(bytes))) + } + return &checkError{err: errors.Join(errs...), transient: transient} +} + +func (ce checkError) Error() string { + return ce.err.Error() +} + +func reportErrorToError(reportError *servicecontrol.ReportError) error { + bytes, _ := reportError.MarshalJSON() + return errors.New(string(bytes)) +} diff --git a/pipeline/endpoints/servicecontrol_test.go b/pipeline/endpoints/servicecontrol_test.go index 408d247..35cc9d3 100644 --- a/pipeline/endpoints/servicecontrol_test.go +++ b/pipeline/endpoints/servicecontrol_test.go @@ -60,8 +60,40 @@ func (e mockNetError) Timeout() bool { func (h *recordingHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { h.req = r + + var err error + h.body, err = ioutil.ReadAll(r.Body) + if err != nil { + panic(err) + } + + var respJson []byte if strings.Contains(r.RequestURI, ":check") { h.checkCount++ + req := &servicecontrol.CheckRequest{} + err := json.Unmarshal(h.body, req) + if err != nil { + h.t.Fatalf("Unable to parse check request %+v", err) + } + + resp := &servicecontrol.CheckResponse{} + + if req.Operation.OperationId == "billing-disabled" { + resp.CheckErrors = []*servicecontrol.CheckError{{ + Code: "BILLING_DISABLED", + }} + } + + if req.Operation.OperationId == "check-unknown-error" { + resp.CheckErrors = []*servicecontrol.CheckError{{ + Code: "UNKNOWN", + }} + } + + respJson, err = resp.MarshalJSON() + if err != nil { + panic(err) + } } if strings.Contains(r.RequestURI, ":report") { @@ -69,18 +101,28 @@ func (h *recordingHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { if h.checkCount == 0 { h.t.Fatalf("Check should be called before Report") } - } - var err error - h.body, err = ioutil.ReadAll(r.Body) - if err != nil { - panic(err) - } - resp := &servicecontrol.ReportResponse{} - respJson, err := resp.MarshalJSON() - if err != nil { - panic(err) + req := &servicecontrol.ReportRequest{} + err := json.Unmarshal(h.body, req) + if err != nil { + h.t.Fatalf("Unable to parse report request %+v", err) + } + + resp := &servicecontrol.ReportResponse{} + if req.Operations[0].OperationId == "report-error" { + resp.ReportErrors = []*servicecontrol.ReportError{{ + Status: &servicecontrol.Status{ + Message: "Unknown report error", + }, + }} + } + + respJson, err = resp.MarshalJSON() + if err != nil { + panic(err) + } } + w.Write(respJson) } @@ -194,6 +236,68 @@ func TestServiceControlEndpoint(t *testing.T) { } }) + t.Run("Check error BILLING_DISABLED returns non-retriable error", func(t *testing.T) { + ep.nextCheck = time.Now().Add(time.Minute * -1) + + // Test a single report write + report, err := ep.BuildReport(metrics.StampedMetricReport{ + Id: "billing-disabled", + MetricReport: metrics.MetricReport{ + Name: "int-metric1", + StartTime: time.Unix(0, 0), + EndTime: time.Unix(1, 0), + Value: metrics.MetricValue{ + Int64Value: util.NewInt64(10), + }, + }, + }) + if err != nil { + t.Fatalf("error building report: %+v", err) + } + + err = ep.Send(report) + if err == nil { + t.Fatalf("expected error sending report") + } + + checkErr := err.(*checkError) + if checkErr.transient { + t.Fatalf("expected billing disabled to not be a transient error") + } + + }) + + t.Run("Unknown check error returns retriable error", func(t *testing.T) { + ep.nextCheck = time.Now().Add(time.Minute * -1) + + // Test a single report write + report, err := ep.BuildReport(metrics.StampedMetricReport{ + Id: "check-unknown-error", + MetricReport: metrics.MetricReport{ + Name: "int-metric1", + StartTime: time.Unix(0, 0), + EndTime: time.Unix(1, 0), + Value: metrics.MetricValue{ + Int64Value: util.NewInt64(10), + }, + }, + }) + if err != nil { + t.Fatalf("error building report: %+v", err) + } + + err = ep.Send(report) + if err == nil { + t.Fatalf("expected error sending report") + } + + checkErr := err.(*checkError) + if !checkErr.transient { + t.Fatalf("expected transient error") + } + + }) + t.Run("Sent contents are correct", func(t *testing.T) { // Test a single report write report1, err := ep.BuildReport(metrics.StampedMetricReport{ @@ -268,6 +372,37 @@ func TestServiceControlEndpoint(t *testing.T) { } }) + t.Run("ReportError returns transient error", func(t *testing.T) { + // Test a single report write + report, err := ep.BuildReport(metrics.StampedMetricReport{ + Id: "report-error", + MetricReport: metrics.MetricReport{ + Name: "int-metric1", + StartTime: time.Unix(0, 0), + EndTime: time.Unix(1, 0), + Value: metrics.MetricValue{ + Int64Value: util.NewInt64(10), + }, + }, + }) + if err != nil { + t.Fatalf("error building report: %+v", err) + } + + err = ep.Send(report) + if err == nil { + t.Fatalf("expected error sending report") + } + + if !ep.IsTransient(err) { + t.Fatalf("expected transient error") + } + + if !strings.Contains(err.Error(), "Unknown report error") { + t.Fatalf("expected unknown report error") + } + }) + t.Run("IsTransient tests", func(t *testing.T) { cases := []struct { err error @@ -285,6 +420,8 @@ func TestServiceControlEndpoint(t *testing.T) { {mockNetError{temporary: true, timeout: false}, true}, {mockNetError{temporary: false, timeout: true}, true}, {mockNetError{temporary: true, timeout: true}, true}, + {&checkError{err: errors.New("foo"), transient: true}, true}, + {&checkError{err: errors.New("foo"), transient: false}, false}, } for _, c := range cases { if want, got := c.transient, ep.IsTransient(c.err); want != got { From 904487bf26cc73979263d5537bf205f48c421bc8 Mon Sep 17 00:00:00 2001 From: Brian Gibbon Date: Mon, 29 Apr 2024 20:06:51 +0000 Subject: [PATCH 2/5] Update go version for cloud build --- cloudbuild.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudbuild.Dockerfile b/cloudbuild.Dockerfile index e17e806..e42b5f8 100644 --- a/cloudbuild.Dockerfile +++ b/cloudbuild.Dockerfile @@ -1,5 +1,5 @@ # The image has the environment setup for running builds and tests # on Google Cloud Build. -FROM gcr.io/cloud-builders/go:debian-1.20 +FROM gcr.io/cloud-builders/go:debian-1.21 RUN apt-get update \ && apt-get install -y --no-install-recommends python3-dev From e6e61b993e8014359af1805e1f0ece5e0440679a Mon Sep 17 00:00:00 2001 From: Brian Gibbon Date: Mon, 29 Apr 2024 22:21:42 +0000 Subject: [PATCH 3/5] Upgrade go version for bazel --- WORKSPACE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/WORKSPACE b/WORKSPACE index 596075f..53deeb4 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -21,7 +21,7 @@ http_archive( load("@io_bazel_rules_go//go:deps.bzl", "go_register_toolchains", "go_rules_dependencies") -go_register_toolchains(version = "1.19.9") +go_register_toolchains(version = "1.20.5") load("@bazel_gazelle//:deps.bzl", "gazelle_dependencies", "go_repository") load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") From d30949ec100d9b94a2f85a9d8ba395da973277ac Mon Sep 17 00:00:00 2001 From: Brian Gibbon Date: Mon, 29 Apr 2024 23:22:35 +0000 Subject: [PATCH 4/5] Fix linker error on c++ agent --- sdk/cpp/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/cpp/BUILD.bazel b/sdk/cpp/BUILD.bazel index c9825fd..0b2c620 100644 --- a/sdk/cpp/BUILD.bazel +++ b/sdk/cpp/BUILD.bazel @@ -25,6 +25,7 @@ cc_library( cc_test( name = "agent_test", srcs = ["agent_test.cc"], + linkopts = ["-lresolv"], deps = [ ":agent", ":api.cc", From b9b261492a74be5d8a2b5766fee89289117c0503 Mon Sep 17 00:00:00 2001 From: Brian Gibbon Date: Tue, 30 Apr 2024 20:30:54 +0000 Subject: [PATCH 5/5] Remove debugging print statements --- pipeline/endpoints/servicecontrol.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/pipeline/endpoints/servicecontrol.go b/pipeline/endpoints/servicecontrol.go index dec1839..53b1a3b 100644 --- a/pipeline/endpoints/servicecontrol.go +++ b/pipeline/endpoints/servicecontrol.go @@ -208,13 +208,11 @@ func checkErrorsToError(checkErrors []*servicecontrol.CheckError) error { var errs []error var transient = true for _, checkError := range checkErrors { - fmt.Println("Check error", checkError.Code) switch checkError.Code { // These errors indicate customer disabling billing and // is not retriable. See: https://cloud.google.com/marketplace/docs/partners/integrated-saas/backend-integration#for_usage-based_pricing_reporting_usage_to_google case "BILLING_DISABLED", "SERVICE_NOT_ACTIVATED", "PROJECT_DELETED": transient = false - fmt.Println("Transient") } bytes, _ := checkError.MarshalJSON() errs = append(errs, errors.New(string(bytes)))