Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion auto_round/utils/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import torch
import transformers
from transformers.modeling_utils import no_init_weights as skip_weights_initialize

from auto_round import envs
from auto_round.export.export_to_gguf.config import ModelType
Expand Down Expand Up @@ -1002,7 +1003,8 @@ def convert_fp8_layer_to_linear(layer, dtype=torch.bfloat16, device: str = "cpu"
""" """
Copy link

Copilot AI Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Empty docstring should be removed or replaced with meaningful documentation explaining the function's purpose, parameters, and return value.

Suggested change
""" """
"""
Convert an FP8-quantized linear-like layer to a standard torch.nn.Linear layer
in a higher-precision dtype by dequantizing its weights and copying metadata.
This helper is intended for layers produced by AutoRound quantization, such as
regular FP8 linear layers or `CompressedLinear` layers with an attached
compressor. It reconstructs a dense Linear layer with dequantized weights and
preserves relevant attributes from the original layer (e.g. QuantizationScheme
fields, temporary names, and scale dtype).
Args:
layer: The source FP8-quantized layer instance to convert. It is expected
to have `in_features`, `out_features`, an optional `bias`, and either
a `compressor.decompress_module` method (for `CompressedLinear`) or
FP8 weight/scale attributes (`weight`, `weight_scale` or
`weight_scale_inv`, and `block_size`).
dtype: The target floating-point dtype for the new Linear layer weights
and bias. Defaults to torch.bfloat16.
device (str): Device on which to place the source layer before
dequantization (e.g. "cpu", "cuda").
Returns:
torch.nn.Linear: A new Linear layer with dequantized weights in the given
dtype and copied bias and quantization-related attributes.
"""

Copilot uses AI. Check for mistakes.
from auto_round.schemes import QuantizationScheme

new_layer = torch.nn.Linear(layer.in_features, layer.out_features, bias=layer.bias is not None, dtype=dtype)
with skip_weights_initialize():
new_layer = torch.nn.Linear(layer.in_features, layer.out_features, bias=layer.bias is not None, dtype=dtype)
if layer.bias is not None:
new_layer.bias.data.copy_(layer.bias.data.to(dtype=dtype))
scheme_keys = (f.name for f in fields(QuantizationScheme))
Expand Down