-
Notifications
You must be signed in to change notification settings - Fork 15.4k
Open
Description
This function has worse code when compiled on targets with v_fmac_f64: https://godbolt.org/z/aq5e6efa3
The gfx90a compile uses v_fmac_f64, and incurs a cost of 2 copies at the function return compared to gfx900, which did not have the instruction (and instead uses the 3 address v_fma_f64)
target triple = "amdgcn-amd-amdhsa"
; Worse code when v_fmac_f64 is available.
define double @bad_2addr_mac_f64(double %x) {
entry:
%i = tail call double @llvm.amdgcn.rsq.f64(double %x)
%or.cond = tail call i1 @llvm.is.fpclass.f64(double %x, i32 608)
%cond = select i1 %or.cond, double %i, double %x
%fneg = fneg double %cond
%mul = fmul double %i, %fneg
%i1 = tail call double @llvm.fma.f64(double %mul, double %i, double 1.000000e+00)
%mul2 = fmul double %i, %i1
%i2 = tail call double @llvm.fma.f64(double %i1, double 3.750000e-01, double 5.000000e-01)
%i3 = tail call double @llvm.fma.f64(double %mul2, double %i2, double %i)
ret double %i3
}
declare double @llvm.amdgcn.rsq.f64(double) #0
declare double @llvm.fma.f64(double, double, double) #1
declare i1 @llvm.is.fpclass.f64(double, i32 immarg) #1
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #1 = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }