@@ -15,6 +15,8 @@ import (
1515 "testing"
1616 "time"
1717
18+ "github.com/NVIDIA/go-nvml/pkg/nvml"
19+ "github.com/NVIDIA/go-nvml/pkg/nvml/mock/dgxa100"
1820 "github.com/stretchr/testify/require"
1921
2022 spec "github.com/NVIDIA/k8s-device-plugin/api/config/v1"
@@ -420,6 +422,80 @@ func TestFailOnNVMLInitError(t *testing.T) {
420422 }
421423}
422424
425+ // TODO: This should be extended to a more representative test.
426+ func TestGFDLabellers (t * testing.T ) {
427+ nvmllib := dgxa100 .New ()
428+
429+ for _ , d := range nvmllib .Devices {
430+ // TODO: This is not implemented in the mock.
431+ (d .(* dgxa100.Device )).GetGpuFabricInfoFunc = func () (nvml.GpuFabricInfo , nvml.Return ) {
432+ return nvml.GpuFabricInfo {}, nvml .ERROR_NOT_SUPPORTED
433+ }
434+ }
435+
436+ // Force one of the devices to have errors when enumerating the device.
437+ workingDevices := nvmllib .DeviceGetHandleByIndexFunc
438+ nvmllib .DeviceGetHandleByIndexFunc = func (n int ) (nvml.Device , nvml.Return ) {
439+ if n == 0 {
440+ return nil , nvml .ERROR_INVALID_ARGUMENT
441+ }
442+ return workingDevices (n )
443+ }
444+
445+ cfg := & Config {}
446+ config := & spec.Config {
447+ Flags : spec.Flags {
448+ CommandLineFlags : spec.CommandLineFlags {
449+ DeviceDiscoveryStrategy : ptr ("nvml" ),
450+ FailOnInitError : ptr (true ),
451+ MigStrategy : ptr ("none" ),
452+ GFD : & spec.GFDCommandLineFlags {
453+ MachineTypeFile : ptr ("" ),
454+ OutputFile : ptr ("" ),
455+ },
456+ },
457+ },
458+ }
459+ d , err := newGFDRunner (cfg , nvmllib , config )
460+ require .NoError (t , err )
461+
462+ loopLabelers , err := lm .NewLabelers (d .manager , d .vgpu , d .config )
463+ require .NoError (t , err )
464+
465+ labels , err := loopLabelers .Labels ()
466+ require .NoError (t , err )
467+
468+ expectedLabels := map [string ]string {
469+ "nvidia.com/cuda.driver-version.full" : "550.54.15" ,
470+ "nvidia.com/cuda.driver-version.major" : "550" ,
471+ "nvidia.com/cuda.driver-version.minor" : "54" ,
472+ "nvidia.com/cuda.driver-version.revision" : "15" ,
473+ "nvidia.com/cuda.driver.major" : "550" ,
474+ "nvidia.com/cuda.driver.minor" : "54" ,
475+ "nvidia.com/cuda.driver.rev" : "15" ,
476+ "nvidia.com/cuda.runtime-version.full" : "12.4" ,
477+ "nvidia.com/cuda.runtime-version.major" : "12" ,
478+ "nvidia.com/cuda.runtime-version.minor" : "4" ,
479+ "nvidia.com/cuda.runtime.major" : "12" ,
480+ "nvidia.com/cuda.runtime.minor" : "4" ,
481+ "nvidia.com/gpu.compute.major" : "8" ,
482+ "nvidia.com/gpu.compute.minor" : "0" ,
483+ "nvidia.com/gpu.count" : "7" ,
484+ "nvidia.com/gpu.family" : "ampere" ,
485+ "nvidia.com/gpu.machine" : "unknown" ,
486+ "nvidia.com/gpu.memory" : "40960" ,
487+ "nvidia.com/gpu.mode" : "unknown" ,
488+ "nvidia.com/gpu.product" : "Mock-NVIDIA-A100-SXM4-40GB" ,
489+ "nvidia.com/gpu.replicas" : "1" ,
490+ "nvidia.com/gpu.sharing-strategy" : "none" ,
491+ "nvidia.com/mig.capable" : "true" ,
492+ "nvidia.com/mps.capable" : "false" ,
493+ }
494+
495+ require .EqualValues (t , expectedLabels , (map [string ]string )(labels ))
496+
497+ }
498+
423499func buildLabelMapFromOutput (output []byte ) (map [string ]string , error ) {
424500 labels := make (map [string ]string )
425501
0 commit comments