Bug fixes (#453)

oleksost · web-flow · commit 17cd7ed1816a · 2026-01-22T10:00:06.000-05:00
diff --git a/fast_llm/data/dataset/sampled.py b/fast_llm/data/dataset/sampled.py
@@ -367,7 +367,7 @@ def __getitem__(self, index: int) -> SampleType:
 
         document_sampling_index = token_start_cumsum_index * TOKEN_CUMSUM_RATE + token_start_array_document_offset
 
-        token_count = token_start_array[token_start_cumsum_index]
+        token_count = token_start_array[token_start_cumsum_index].item()
 
         documents: list[SampleType] = []
         while token_count < token_end:
diff --git a/fast_llm_external_models/apriel2/modeling_apriel2.py b/fast_llm_external_models/apriel2/modeling_apriel2.py
@@ -7,11 +7,7 @@
 
 import torch
 import torch.nn.functional as F
-from causal_conv1d import causal_conv1d_fn as _causal_conv1d_fn
-from causal_conv1d import causal_conv1d_update as _causal_conv1d_update
 from einops import rearrange, repeat
-from mamba_ssm.ops.selective_scan_interface import selective_scan_fn
-from mamba_ssm.ops.triton.selective_state_update import selective_state_update
 from torch import nn
 from transformers import GenerationMixin, PreTrainedModel
 from transformers.cache_utils import Cache
@@ -52,6 +48,19 @@
     fused_recurrent_kda = None
     fused_kda_gate = None
 
+
+try:
+    from causal_conv1d import causal_conv1d_fn as _causal_conv1d_fn
+    from causal_conv1d import causal_conv1d_update as _causal_conv1d_update
+    from mamba_ssm.ops.selective_scan_interface import selective_scan_fn
+    from mamba_ssm.ops.triton.selective_state_update import selective_state_update
+except ImportError:
+    _causal_conv1d_fn = None
+    _causal_conv1d_update = None
+    selective_scan_fn = None
+    selective_state_update = None
+
+
 is_fast_path_available = is_mamba_ssm_available() and is_causal_conv1d_available()
 
 if is_torch_flex_attn_available():
@@ -489,14 +498,6 @@ class PreprocessingOutput(TypedDict, total=False):
     attention_mask: Optional[torch.Tensor]
 
 
-# Require fast path CUDA kernels - no silent fallback to unoptimized code paths
-if not is_fast_path_available:
-    raise ImportError(
-        "CausalConv1d and Mamba require CUDA kernels from causal_conv1d and mamba_ssm. "
-        "Install with: pip install causal-conv1d mamba-ssm"
-    )
-
-
 class CausalConv1d(nn.Conv1d):
     """
     Causal 1D convolution that pads only on the left side.
@@ -519,6 +520,11 @@ def __init__(
         activation: str = "silu",
         **kwargs,
     ):
+        if not is_fast_path_available:
+            raise ImportError(
+                "CausalConv1d requires CUDA kernels from causal_conv1d and mamba_ssm. "
+                "Install with: pip install causal-conv1d mamba-ssm"
+            )
         # Remove padding from kwargs since we handle it ourselves
         kwargs.pop("padding", None)
         super().__init__(
diff --git a/fast_llm_external_models/tests/test_apriel2/test_conversion_e2e.py b/fast_llm_external_models/tests/test_apriel2/test_conversion_e2e.py
@@ -138,6 +138,7 @@ def expand_surgery_chain_with_cycling(
     return expanded
 
 
+@requires_cuda
 class TestPlanCompositionTorture:
     """End-to-end torture test for plan composition.
 
diff --git a/fast_llm_external_models/tests/test_apriel2/test_model_structure.py b/fast_llm_external_models/tests/test_apriel2/test_model_structure.py
@@ -2,10 +2,16 @@
 
 import torch
 
-from fast_llm_external_models.apriel2.modeling_apriel2 import Apriel2Cache, _AttentionCache, _SSMCache
-from fast_llm_external_models.apriel2.modeling_apriel2 import Apriel2ForCausalLM
+from fast_llm_external_models.apriel2.modeling_apriel2 import (
+    Apriel2Cache,
+    Apriel2ForCausalLM,
+    _AttentionCache,
+    _SSMCache,
+)
+from fast_llm_external_models.tests.test_apriel2.conftest import requires_cuda
 
 
+@requires_cuda
 class TestStochasticMixerStructure:
     """Validate stochastic mixer architecture matches configuration."""
 
diff --git a/fast_llm_external_models/tests/test_apriel2/test_modeling.py b/fast_llm_external_models/tests/test_apriel2/test_modeling.py
@@ -4,6 +4,7 @@
 import torch
 
 from fast_llm_external_models.apriel2.modeling_apriel2 import Apriel2ForCausalLM
+from fast_llm_external_models.tests.test_apriel2.conftest import requires_cuda
 
 
 class TestApriel2Modeling:
@@ -13,9 +14,9 @@ class TestApriel2Modeling:
         "config_name",
         [
             "apriel2_config_tiny",
-            "apriel2_config_stochastic",
-            "apriel2_config_multi_mixer",
-            "apriel2_config_all_mixers",  # Tests all 4 mixer types
+            pytest.param("apriel2_config_stochastic", marks=requires_cuda),
+            pytest.param("apriel2_config_multi_mixer", marks=requires_cuda),
+            pytest.param("apriel2_config_all_mixers", marks=requires_cuda),  # Tests all 4 mixer types
             "apriel2_config_with_bias",  # Tests per-layer bias and non-gated MLP
         ],
     )
diff --git a/tests/layers/test_ssm.py b/tests/layers/test_ssm.py
@@ -10,11 +10,14 @@
 from fast_llm.layers.ssm import kda as kda_module
 from fast_llm.layers.ssm.config import GatedDeltaNetConfig, KimiDeltaAttentionConfig, MambaConfig
 from fast_llm.utils import Assert
-from fast_llm_external_models.apriel2.modeling_apriel2 import Apriel2GatedDeltaNet, Apriel2Mamba, KimiDeltaAttention
 from tests.utils.utils import get_stage, requires_cuda
 
 try:
-    from fast_llm_external_models.apriel2.modeling_apriel2 import Apriel2GatedDeltaNet, Apriel2Mamba
+    from fast_llm_external_models.apriel2.modeling_apriel2 import (
+        Apriel2GatedDeltaNet,
+        Apriel2Mamba,
+        KimiDeltaAttention,
+    )
 except ImportError:
     Apriel2GatedDeltaNet = None
     Apriel2Mamba = None