-
Notifications
You must be signed in to change notification settings - Fork 674
Closed
Labels
bugSomething isn't workingSomething isn't working
Description
Search before asking
- I have searched the RF-DETR issues and found no similar bug report.
Bug
What is the correct method for exporting the RF-DETR segmentation models to TensorRT? Specifically trying nano and small, my exported models (both with and without fp16, and with and without simplified onnx) perform extremely poorly getting confidence below 0.1 for persons detected.
I can see on the benchmarks on the website you have used tensorrt fp16, so how was this done while maintaining accuracy and performance? I've attached the script ive created for exporting to tensorrt currently.
Environment
RF-DETR Version: 1.4.1
OS: WSL2 Ubuntu: 24.04
Python version: 3.11.14
PyTorch Version: 2.8.0
Cuda version: 13.0
GPU: RTX 3080
Minimal Reproducible Example
import sys
import argparse
import os
import subprocess
from pathlib import Path
from copy import deepcopy
import torch
# Add rf-detr to path if needed
project_root = Path(__file__).parent.parent.absolute()
rf_detr_path = project_root / "rf-detr"
if str(rf_detr_path) not in sys.path:
sys.path.insert(0, str(rf_detr_path))
print(f"Added {rf_detr_path} to sys.path")
def trtexec_fp32(onnx_path: str, verbose: bool = False) -> str:
"""
Build TensorRT engine using trtexec (FP32, no fp16).
Uses the same approach as rfdetr/deploy/export.py but without --fp16.
"""
engine_path = onnx_path.replace(".onnx", ".engine")
# Build trtexec command - NO --fp16 flag for FP32
command = " ".join([
"trtexec",
f"--onnx={onnx_path}",
f"--saveEngine={engine_path}",
"--memPoolSize=workspace:4096", # No --fp16
"--useCudaGraph --useSpinWait --warmUp=500 --avgRuns=1000 --duration=10",
"--verbose" if verbose else ""
])
print(f"Running trtexec command:\n{command}\n")
result = subprocess.run(command, shell=True, capture_output=True, text=True)
print(result.stdout)
if result.returncode != 0:
print(f"TensorRT build failed:\n{result.stderr}")
raise RuntimeError("trtexec failed")
print(f"TensorRT engine saved to: {engine_path}")
return engine_path
def main():
parser = argparse.ArgumentParser(description="Export RF-DETR model using official API")
parser.add_argument("--output_dir", type=str, default="export_output_official", help="Output directory")
parser.add_argument("--simplify", action="store_true", default=False, help="Simplify ONNX model (default: False)")
parser.add_argument("--tensorrt", action="store_true", help="Build TensorRT engine after ONNX export")
parser.add_argument("--fp16", action="store_true", help="Use FP16 for TensorRT (default: FP32)")
parser.add_argument("--model", type=str, default="small",
choices=["nano", "small", "medium", "large"],
help="Model size to export (default: small)")
parser.add_argument("--verbose", action="store_true", help="Verbose output")
args = parser.parse_args()
# Import the appropriate model class
print(f"Loading RFDETRSeg{args.model.capitalize()} model...")
if args.model == "nano":
from rfdetr import RFDETRSegNano
model = RFDETRSegNano()
elif args.model == "small":
from rfdetr import RFDETRSegSmall
model = RFDETRSegSmall()
elif args.model == "medium":
from rfdetr import RFDETRSegMedium
model = RFDETRSegMedium()
elif args.model == "large":
from rfdetr import RFDETRSegLarge
model = RFDETRSegLarge()
print(f"Model loaded. Resolution: {model.model_config.resolution}")
print(f"Pretrain weights: {model.model_config.pretrain_weights}")
# Export using lower-level API (workaround for segmentation bug in Model.export())
from rfdetr.deploy.export import export_onnx, make_infer_image, onnx_simplify, trtexec
output_dir = Path(args.output_dir)
os.makedirs(output_dir, exist_ok=True)
resolution = model.model_config.resolution
shape = (resolution, resolution)
# Prepare model for export
inner_model = deepcopy(model.model.model) # Get the nn.Module
inner_model.eval()
if hasattr(inner_model, 'export'):
inner_model.export() # Switch to export mode
# Create input tensor
input_tensors = make_infer_image(None, shape, 1, "cpu")
inner_model.cpu()
# Define export configuration
input_names = ['input']
output_names = ['dets', 'labels', 'masks'] # Segmentation model outputs
print(f"\nExporting to ONNX in '{output_dir}'...")
output_file = export_onnx(
output_dir=str(output_dir),
model=inner_model,
input_names=input_names,
input_tensors=input_tensors,
output_names=output_names,
dynamic_axes=None,
backbone_only=False,
verbose=args.verbose,
opset_version=17
)
print(f"ONNX export complete: {output_file}")
# Simplify if requested
if args.simplify:
print("\nSimplifying ONNX model...")
output_file = onnx_simplify(
onnx_dir=output_file,
input_names=input_names,
input_tensors=input_tensors,
force=True
)
print(f"Simplified ONNX: {output_file}")
# Build TensorRT engine if requested
if args.tensorrt:
print("\nBuilding TensorRT engine...")
if args.fp16:
# Use the library's trtexec (has --fp16 hardcoded)
class TRTArgs:
verbose = args.verbose
profile = False
dry_run = False
trtexec(output_file, TRTArgs())
else:
# Use our custom FP32 version
trtexec_fp32(output_file, verbose=args.verbose)
print("\nTensorRT engine build complete!")
print("\nDone!")
if __name__ == "__main__":
main()
Additional
No response
Are you willing to submit a PR?
- Yes, I'd like to help by submitting a PR!
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working