Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix platform detection #673

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.22.2

require (
github.com/NVIDIA/go-gpuallocator v0.4.2
github.com/NVIDIA/go-nvlib v0.3.1
github.com/NVIDIA/go-nvlib v0.4.0
github.com/NVIDIA/go-nvml v0.12.0-6
github.com/NVIDIA/nvidia-container-toolkit v1.15.1-0.20240419094620-0aed9a16addf
github.com/fsnotify/fsnotify v1.7.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ github.com/Microsoft/hcsshim v0.11.4 h1:68vKo2VN8DE9AdN4tnkWnmdhqdbpUFM8OF3Airm7
github.com/Microsoft/hcsshim v0.11.4/go.mod h1:smjE4dvqPX9Zldna+t5FG3rnoHhaB7QYxPRqGcpAD9w=
github.com/NVIDIA/go-gpuallocator v0.4.2 h1:OSW55pQKXQaL2qN+NFGtvlNzasl8wSr6vDW+4bRI4bg=
github.com/NVIDIA/go-gpuallocator v0.4.2/go.mod h1:op8+/A79CnWBjdl/bsk2vR40Afj+EukIF4m8cE0YUsk=
github.com/NVIDIA/go-nvlib v0.3.1 h1:4xvcf/OHXPL2BYXx9Sj44FtoEPYsYNxUe+Dvmy9V6IE=
github.com/NVIDIA/go-nvlib v0.3.1/go.mod h1:87z49ULPr4GWPSGfSIp3taU4XENRYN/enIg88MzcL4k=
github.com/NVIDIA/go-nvlib v0.4.0 h1:dvuqjjSamBODFuxttPg4H/xtNVQRZOSlwFtuNKybcGI=
github.com/NVIDIA/go-nvlib v0.4.0/go.mod h1:87z49ULPr4GWPSGfSIp3taU4XENRYN/enIg88MzcL4k=
github.com/NVIDIA/go-nvml v0.12.0-6 h1:FJYc2KrpvX+VOC/8QQvMiQMmZ/nPMRpdJO/Ik4xfcr0=
github.com/NVIDIA/go-nvml v0.12.0-6/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ=
github.com/NVIDIA/nvidia-container-toolkit v1.15.1-0.20240419094620-0aed9a16addf h1:6eKsIVTytQ34X4rFoPjcW+JbJ8XvYH3ITevLED+enD8=
Expand Down
36 changes: 10 additions & 26 deletions internal/plugin/manager/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ func New(opts ...Option) (Interface, error) {
}

if m.infolib == nil {
m.infolib = info.New()
m.infolib = info.New(
info.WithNvmlLib(m.nvmllib),
)
}

mode, err := m.resolveMode()
Expand Down Expand Up @@ -95,20 +97,13 @@ func New(opts ...Option) (Interface, error) {
}

func (m *manager) resolveMode() (string, error) {
// logWithReason logs the output of the has* / is* checks from the info.Interface
logWithReason := func(f func() (bool, string), tag string) bool {
is, reason := f()
if !is {
tag = "non-" + tag
}
klog.Infof("Detected %v platform: %v", tag, reason)
return is
}

hasNVML := logWithReason(m.infolib.HasNvml, "NVML")
isTegra := logWithReason(m.infolib.IsTegraSystem, "Tegra")

if !hasNVML && !isTegra {
platform := m.infolib.ResolvePlatform()
switch platform {
case info.PlatformNVML, info.PlatformWSL:
return "nvml", nil
case info.PlatformTegra:
return "tegra", nil
default:
klog.Error("Incompatible platform detected")
klog.Error("If this is a GPU node, did you configure the NVIDIA Container Toolkit?")
klog.Error("You can check the prerequisites at: https://github.com/NVIDIA/k8s-device-plugin#prerequisites")
Expand All @@ -119,15 +114,4 @@ func (m *manager) resolveMode() (string, error) {
}
return "null", nil
}

// The NVIDIA container stack does not yet support the use of integrated AND discrete GPUs on the same node.
if isTegra {
if hasNVML {
klog.Warning("Disabling Tegra-based resources on NVML system")
return "nvml", nil
}
return "tegra", nil
}

return "nvml", nil
}
32 changes: 6 additions & 26 deletions internal/resource/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ func WithConfig(manager Manager, config *spec.Config) Manager {

// getManager returns the resource manager depending on the system configuration.
func getManager(mode string) Manager {

resolved := resolveMode(mode)
switch resolved {
case "nvml":
Expand All @@ -62,34 +61,15 @@ func resolveMode(mode string) string {
if mode != "" && mode != "auto" {
return mode
}

// logWithReason logs the output of the has* / is* checks from the info.Interface
logWithReason := func(f func() (bool, string), tag string) bool {
is, reason := f()
if !is {
tag = "non-" + tag
}
klog.Infof("Detected %v platform: %v", tag, reason)
return is
}

infolib := info.New()

hasNVML := logWithReason(infolib.HasNvml, "NVML")
isTegra := logWithReason(infolib.IsTegraSystem, "Tegra")

// The NVIDIA container stack does not yet support the use of integrated AND discrete GPUs on the same node.
if hasNVML && isTegra {
klog.Warning("Disabling Tegra-based resources on NVML system")
isTegra = false
}

if hasNVML {
platform := infolib.ResolvePlatform()
switch platform {
case info.PlatformNVML, info.PlatformWSL:
return "nvml"
}

if isTegra {
case info.PlatformTegra:
return "tegra"
default:
return "unknown"
}
return mode
}
41 changes: 41 additions & 0 deletions vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/api.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

78 changes: 78 additions & 0 deletions vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/builder.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

102 changes: 0 additions & 102 deletions vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/info.go

This file was deleted.

28 changes: 28 additions & 0 deletions vendor/github.com/NVIDIA/go-nvlib/pkg/nvlib/info/logger.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading