Don't detect local GPU if $DS_IGNORE_CUDA_DETECTION is set

Flamefire · Flamefire · commit fda9ceadef70 · 2026-03-11T17:29:30.000+01:00
Make this consistent over all OPs:
For cross-compilation we should not check the local GPU version.
diff --git a/op_builder/fp_quantizer.py b/op_builder/fp_quantizer.py
@@ -3,6 +3,7 @@
 
 # DeepSpeed Team
 
+import os
 try:
     from packaging import version as pkg_version
 except ImportError:
@@ -31,19 +32,20 @@ def is_compatible(self, verbose=False):
             return False
 
         cuda_okay = True
-        if not self.is_rocm_pytorch() and torch.cuda.is_available():  #ignore-cuda
-            sys_cuda_major, _ = installed_cuda_version()
-            torch_cuda_major = int(torch.version.cuda.split('.')[0])
-            cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
-            if cuda_capability < 8:
-                if verbose:
-                    self.warning("NVIDIA Inference is only supported on Ampere and newer architectures")
-                cuda_okay = False
-            if cuda_capability >= 8:
-                if torch_cuda_major < 11 or sys_cuda_major < 11:
+        if not os.environ.get("DS_IGNORE_CUDA_DETECTION"):
+            if not self.is_rocm_pytorch() and torch.cuda.is_available():  #ignore-cuda
+                sys_cuda_major, _ = installed_cuda_version()
+                torch_cuda_major = int(torch.version.cuda.split('.')[0])
+                cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
+                if cuda_capability < 8:
                     if verbose:
-                        self.warning("On Ampere and higher architectures please use CUDA 11+")
+                        self.warning("NVIDIA Inference is only supported on Ampere and newer architectures")
                     cuda_okay = False
+                if cuda_capability >= 8:
+                    if torch_cuda_major < 11 or sys_cuda_major < 11:
+                        if verbose:
+                            self.warning("On Ampere and higher architectures please use CUDA 11+")
+                        cuda_okay = False
 
         try:
             import triton
diff --git a/op_builder/inference_core_ops.py b/op_builder/inference_core_ops.py
@@ -28,19 +28,20 @@ def is_compatible(self, verbose=False):
             return False
 
         cuda_okay = True
-        if not self.is_rocm_pytorch() and torch.cuda.is_available():  #ignore-cuda
-            sys_cuda_major, _ = installed_cuda_version()
-            torch_cuda_major = int(torch.version.cuda.split('.')[0])
-            cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
-            if cuda_capability < 6:
-                if verbose:
-                    self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
-                cuda_okay = False
-            if cuda_capability >= 8:
-                if torch_cuda_major < 11 or sys_cuda_major < 11:
+        if not os.environ.get("DS_IGNORE_CUDA_DETECTION"):
+            if not self.is_rocm_pytorch() and torch.cuda.is_available():  #ignore-cuda
+                sys_cuda_major, _ = installed_cuda_version()
+                torch_cuda_major = int(torch.version.cuda.split('.')[0])
+                cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
+                if cuda_capability < 6:
                     if verbose:
-                        self.warning("On Ampere and higher architectures please use CUDA 11+")
+                        self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
                     cuda_okay = False
+                if cuda_capability >= 8:
+                    if torch_cuda_major < 11 or sys_cuda_major < 11:
+                        if verbose:
+                            self.warning("On Ampere and higher architectures please use CUDA 11+")
+                        cuda_okay = False
         return super().is_compatible(verbose) and cuda_okay
 
     def filter_ccs(self, ccs):
diff --git a/op_builder/inference_cutlass_builder.py b/op_builder/inference_cutlass_builder.py
@@ -27,19 +27,20 @@ def is_compatible(self, verbose=False):
             return False
 
         cuda_okay = True
-        if not self.is_rocm_pytorch() and torch.cuda.is_available():  #ignore-cuda
-            sys_cuda_major, _ = installed_cuda_version()
-            torch_cuda_major = int(torch.version.cuda.split('.')[0])
-            cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
-            if cuda_capability < 6:
-                if verbose:
-                    self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
-                cuda_okay = False
-            if cuda_capability >= 8:
-                if torch_cuda_major < 11 or sys_cuda_major < 11:
+        if not os.environ.get("DS_IGNORE_CUDA_DETECTION"):
+            if not self.is_rocm_pytorch() and torch.cuda.is_available():  #ignore-cuda
+                sys_cuda_major, _ = installed_cuda_version()
+                torch_cuda_major = int(torch.version.cuda.split('.')[0])
+                cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
+                if cuda_capability < 6:
                     if verbose:
-                        self.warning("On Ampere and higher architectures please use CUDA 11+")
+                        self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
                     cuda_okay = False
+                if cuda_capability >= 8:
+                    if torch_cuda_major < 11 or sys_cuda_major < 11:
+                        if verbose:
+                            self.warning("On Ampere and higher architectures please use CUDA 11+")
+                        cuda_okay = False
         return super().is_compatible(verbose) and cuda_okay
 
     def filter_ccs(self, ccs):
diff --git a/op_builder/ragged_ops.py b/op_builder/ragged_ops.py
@@ -28,19 +28,20 @@ def is_compatible(self, verbose=False):
             return False
 
         cuda_okay = True
-        if not self.is_rocm_pytorch() and torch.cuda.is_available():  #ignore-cuda
-            sys_cuda_major, _ = installed_cuda_version()
-            torch_cuda_major = int(torch.version.cuda.split('.')[0])
-            cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
-            if cuda_capability < 6:
-                if verbose:
-                    self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
-                cuda_okay = False
-            if cuda_capability >= 8:
-                if torch_cuda_major < 11 or sys_cuda_major < 11:
+        if not os.environ.get("DS_IGNORE_CUDA_DETECTION"):
+            if not self.is_rocm_pytorch() and torch.cuda.is_available():  #ignore-cuda
+                sys_cuda_major, _ = installed_cuda_version()
+                torch_cuda_major = int(torch.version.cuda.split('.')[0])
+                cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
+                if cuda_capability < 6:
                     if verbose:
-                        self.warning("On Ampere and higher architectures please use CUDA 11+")
+                        self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
                     cuda_okay = False
+                if cuda_capability >= 8:
+                    if torch_cuda_major < 11 or sys_cuda_major < 11:
+                        if verbose:
+                            self.warning("On Ampere and higher architectures please use CUDA 11+")
+                        cuda_okay = False
         return super().is_compatible(verbose) and cuda_okay
 
     def filter_ccs(self, ccs):
diff --git a/op_builder/ragged_utils.py b/op_builder/ragged_utils.py
@@ -28,19 +28,20 @@ def is_compatible(self, verbose=False):
             return False
 
         cuda_okay = True
-        if not self.is_rocm_pytorch() and torch.cuda.is_available():  #ignore-cuda
-            sys_cuda_major, _ = installed_cuda_version()
-            torch_cuda_major = int(torch.version.cuda.split('.')[0])
-            cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
-            if cuda_capability < 6:
-                if verbose:
-                    self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
-                cuda_okay = False
-            if cuda_capability >= 8:
-                if torch_cuda_major < 11 or sys_cuda_major < 11:
+        if not os.environ.get("DS_IGNORE_CUDA_DETECTION"):
+            if not self.is_rocm_pytorch() and torch.cuda.is_available():  #ignore-cuda
+                sys_cuda_major, _ = installed_cuda_version()
+                torch_cuda_major = int(torch.version.cuda.split('.')[0])
+                cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
+                if cuda_capability < 6:
                     if verbose:
-                        self.warning("On Ampere and higher architectures please use CUDA 11+")
+                        self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
                     cuda_okay = False
+                if cuda_capability >= 8:
+                    if torch_cuda_major < 11 or sys_cuda_major < 11:
+                        if verbose:
+                            self.warning("On Ampere and higher architectures please use CUDA 11+")
+                        cuda_okay = False
         return super().is_compatible(verbose) and cuda_okay
 
     def filter_ccs(self, ccs):
diff --git a/op_builder/spatial_inference.py b/op_builder/spatial_inference.py
@@ -3,6 +3,7 @@
 
 # DeepSpeed Team
 
+import os
 from .builder import CUDAOpBuilder, installed_cuda_version
 
 
@@ -26,15 +27,16 @@ def is_compatible(self, verbose=False):
             return False
 
         cuda_okay = True
-        if not self.is_rocm_pytorch() and torch.cuda.is_available():
-            sys_cuda_major, _ = installed_cuda_version()
-            torch_cuda_major = int(torch.version.cuda.split('.')[0])
-            cuda_capability = torch.cuda.get_device_properties(0).major
-            if cuda_capability >= 8:
-                if torch_cuda_major < 11 or sys_cuda_major < 11:
-                    if verbose:
-                        self.warning("On Ampere and higher architectures please use CUDA 11+")
-                    cuda_okay = False
+        if not os.environ.get("DS_IGNORE_CUDA_DETECTION"):
+            if not self.is_rocm_pytorch() and torch.cuda.is_available():
+                sys_cuda_major, _ = installed_cuda_version()
+                torch_cuda_major = int(torch.version.cuda.split('.')[0])
+                cuda_capability = torch.cuda.get_device_properties(0).major
+                if cuda_capability >= 8:
+                    if torch_cuda_major < 11 or sys_cuda_major < 11:
+                        if verbose:
+                            self.warning("On Ampere and higher architectures please use CUDA 11+")
+                        cuda_okay = False
         return super().is_compatible(verbose) and cuda_okay
 
     def sources(self):
diff --git a/op_builder/transformer_inference.py b/op_builder/transformer_inference.py
@@ -3,6 +3,7 @@
 
 # DeepSpeed Team
 
+import os
 from .builder import CUDAOpBuilder, installed_cuda_version
 
 
@@ -26,19 +27,20 @@ def is_compatible(self, verbose=False):
             return False
 
         cuda_okay = True
-        if not self.is_rocm_pytorch() and torch.cuda.is_available():
-            sys_cuda_major, _ = installed_cuda_version()
-            torch_cuda_major = int(torch.version.cuda.split('.')[0])
-            cuda_capability = torch.cuda.get_device_properties(0).major
-            if cuda_capability < 6:
-                if verbose:
-                    self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
-                cuda_okay = False
-            if cuda_capability >= 8:
-                if torch_cuda_major < 11 or sys_cuda_major < 11:
+        if not os.environ.get("DS_IGNORE_CUDA_DETECTION"):
+            if not self.is_rocm_pytorch() and torch.cuda.is_available():
+                sys_cuda_major, _ = installed_cuda_version()
+                torch_cuda_major = int(torch.version.cuda.split('.')[0])
+                cuda_capability = torch.cuda.get_device_properties(0).major
+                if cuda_capability < 6:
                     if verbose:
-                        self.warning("On Ampere and higher architectures please use CUDA 11+")
+                        self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
                     cuda_okay = False
+                if cuda_capability >= 8:
+                    if torch_cuda_major < 11 or sys_cuda_major < 11:
+                        if verbose:
+                            self.warning("On Ampere and higher architectures please use CUDA 11+")
+                        cuda_okay = False
         return super().is_compatible(verbose) and cuda_okay
 
     def filter_ccs(self, ccs):