intel · Copilot · Jan 16, 2026 · Jan 16, 2026 · Jan 19, 2026 · Copilot
diff --git a/auto_round/utils/model.py b/auto_round/utils/model.py
@@ -22,6 +22,7 @@
 
 import torch
 import transformers
+from transformers.modeling_utils import no_init_weights as skip_weights_initialize
 
 from auto_round import envs
 from auto_round.export.export_to_gguf.config import ModelType
@@ -1002,7 +1003,8 @@ def convert_fp8_layer_to_linear(layer, dtype=torch.bfloat16, device: str = "cpu"
     """ """
-    """ """
+    """
+    Convert an FP8-quantized linear-like layer to a standard torch.nn.Linear layer
+    in a higher-precision dtype by dequantizing its weights and copying metadata.
+
+    This helper is intended for layers produced by AutoRound quantization, such as
+    regular FP8 linear layers or `CompressedLinear` layers with an attached
+    compressor. It reconstructs a dense Linear layer with dequantized weights and
+    preserves relevant attributes from the original layer (e.g. QuantizationScheme
+    fields, temporary names, and scale dtype).
+
+    Args:
+        layer: The source FP8-quantized layer instance to convert. It is expected
+            to have `in_features`, `out_features`, an optional `bias`, and either
+            a `compressor.decompress_module` method (for `CompressedLinear`) or
+            FP8 weight/scale attributes (`weight`, `weight_scale` or
+            `weight_scale_inv`, and `block_size`).
+        dtype: The target floating-point dtype for the new Linear layer weights
+            and bias. Defaults to torch.bfloat16.
+        device (str): Device on which to place the source layer before
+            dequantization (e.g. "cpu", "cuda").
+
+    Returns:
+        torch.nn.Linear: A new Linear layer with dequantized weights in the given
+        dtype and copied bias and quantization-related attributes.
+    """
-    """ """
+    """
+    Convert an FP8-quantized linear-like layer to a standard torch.nn.Linear layer
+    in a higher-precision dtype by dequantizing its weights and copying metadata.
+
+    This helper is intended for layers produced by AutoRound quantization, such as
+    regular FP8 linear layers or `CompressedLinear` layers with an attached
+    compressor. It reconstructs a dense Linear layer with dequantized weights and
+    preserves relevant attributes from the original layer (e.g. QuantizationScheme
+    fields, temporary names, and scale dtype).
+
+    Args:
+        layer: The source FP8-quantized layer instance to convert. It is expected
+            to have `in_features`, `out_features`, an optional `bias`, and either
+            a `compressor.decompress_module` method (for `CompressedLinear`) or
+            FP8 weight/scale attributes (`weight`, `weight_scale` or
+            `weight_scale_inv`, and `block_size`).
+        dtype: The target floating-point dtype for the new Linear layer weights
+            and bias. Defaults to torch.bfloat16.
+        device (str): Device on which to place the source layer before
+            dequantization (e.g. "cpu", "cuda").
+
+    Returns:
+        torch.nn.Linear: A new Linear layer with dequantized weights in the given
+        dtype and copied bias and quantization-related attributes.
+    """
     from auto_round.schemes import QuantizationScheme
 
-    new_layer = torch.nn.Linear(layer.in_features, layer.out_features, bias=layer.bias is not None, dtype=dtype)
+    with skip_weights_initialize():
+        new_layer = torch.nn.Linear(layer.in_features, layer.out_features, bias=layer.bias is not None, dtype=dtype)
     if layer.bias is not None:
         new_layer.bias.data.copy_(layer.bias.data.to(dtype=dtype))
     scheme_keys = (f.name for f in fields(QuantizationScheme))