@@ -15,6 +15,10 @@ import (
1515 "testing"
1616 "time"
1717
18+ "github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
19+ "github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
20+ "github.com/NVIDIA/go-nvml/pkg/nvml"
21+ "github.com/NVIDIA/go-nvml/pkg/nvml/mock/dgxa100"
1822 "github.com/stretchr/testify/require"
1923
2024 spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1"
@@ -420,6 +424,89 @@ func TestFailOnNVMLInitError(t *testing.T) {
420424 }
421425}
422426
427+ // TODO: This should be extended to a more representative test.
428+ func TestGFDLabellers (t * testing.T ) {
429+ nvmllib := dgxa100 .New ()
430+
431+ for _ , d := range nvmllib .Devices {
432+ // TODO: This is not implemented in the mock.
433+ (d .(* dgxa100.Device )).GetGpuFabricInfoFunc = func () (nvml.GpuFabricInfo , nvml.Return ) {
434+ return nvml.GpuFabricInfo {}, nvml .ERROR_NOT_SUPPORTED
435+ }
436+ }
437+
438+ // Force one of the devices to have errors when enumerating the device.
439+ workingDevices := nvmllib .DeviceGetHandleByIndexFunc
440+ nvmllib .DeviceGetHandleByIndexFunc = func (n int ) (nvml.Device , nvml.Return ) {
441+ if n == 0 {
442+ return nil , nvml .ERROR_INVALID_ARGUMENT
443+ }
444+ return workingDevices (n )
445+ }
446+
447+ devicelib := device .New (nvmllib ,
448+ device .WithIgnoreVisitDevicesErrors (true ),
449+ )
450+
451+ infolib := info .New (
452+ info .WithNvmlLib (nvmllib ),
453+ info .WithDeviceLib (devicelib ),
454+ )
455+
456+ cfg := & Config {}
457+ config := & spec.Config {
458+ Flags : spec.Flags {
459+ CommandLineFlags : spec.CommandLineFlags {
460+ DeviceDiscoveryStrategy : ptr ("nvml" ),
461+ FailOnInitError : ptr (true ),
462+ MigStrategy : ptr ("none" ),
463+ GFD : & spec.GFDCommandLineFlags {
464+ MachineTypeFile : ptr ("" ),
465+ OutputFile : ptr ("" ),
466+ },
467+ },
468+ },
469+ }
470+ d , err := newGFDRunner (cfg , infolib , nvmllib , devicelib , config )
471+ require .NoError (t , err )
472+
473+ loopLabelers , err := lm .NewLabelers (d .manager , d .vgpu , d .config )
474+ require .NoError (t , err )
475+
476+ labels , err := loopLabelers .Labels ()
477+ require .NoError (t , err )
478+
479+ expectedLabels := map [string ]string {
480+ "nvidia.com/cuda.driver-version.full" : "550.54.15" ,
481+ "nvidia.com/cuda.driver-version.major" : "550" ,
482+ "nvidia.com/cuda.driver-version.minor" : "54" ,
483+ "nvidia.com/cuda.driver-version.revision" : "15" ,
484+ "nvidia.com/cuda.driver.major" : "550" ,
485+ "nvidia.com/cuda.driver.minor" : "54" ,
486+ "nvidia.com/cuda.driver.rev" : "15" ,
487+ "nvidia.com/cuda.runtime-version.full" : "12.4" ,
488+ "nvidia.com/cuda.runtime-version.major" : "12" ,
489+ "nvidia.com/cuda.runtime-version.minor" : "4" ,
490+ "nvidia.com/cuda.runtime.major" : "12" ,
491+ "nvidia.com/cuda.runtime.minor" : "4" ,
492+ "nvidia.com/gpu.compute.major" : "8" ,
493+ "nvidia.com/gpu.compute.minor" : "0" ,
494+ "nvidia.com/gpu.count" : "7" ,
495+ "nvidia.com/gpu.family" : "ampere" ,
496+ "nvidia.com/gpu.machine" : "unknown" ,
497+ "nvidia.com/gpu.memory" : "40960" ,
498+ "nvidia.com/gpu.mode" : "unknown" ,
499+ "nvidia.com/gpu.product" : "Mock-NVIDIA-A100-SXM4-40GB" ,
500+ "nvidia.com/gpu.replicas" : "1" ,
501+ "nvidia.com/gpu.sharing-strategy" : "none" ,
502+ "nvidia.com/mig.capable" : "true" ,
503+ "nvidia.com/mps.capable" : "false" ,
504+ }
505+
506+ require .EqualValues (t , expectedLabels , (map [string ]string )(labels ))
507+
508+ }
509+
423510func buildLabelMapFromOutput (output []byte ) (map [string ]string , error ) {
424511 labels := make (map [string ]string )
425512
0 commit comments