Skip to content

Commit 2765113

Browse files
authored
AMDGPU: Drop and upgrade llvm.amdgcn.atomic.csub/cond.sub to atomicrmw (#105553)
These both perform conditional subtraction, returning the minuend and zero respectively, if the difference is negative.
1 parent 0a2e56d commit 2765113

File tree

16 files changed

+317
-733
lines changed

16 files changed

+317
-733
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1527,11 +1527,6 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
15271527

15281528
The iglp_opt strategy implementations are subject to change.
15291529

1530-
llvm.amdgcn.atomic.cond.sub.u32 Provides direct access to flat_atomic_cond_sub_u32, global_atomic_cond_sub_u32
1531-
and ds_cond_sub_u32 based on address space on gfx12 targets. This
1532-
performs a subtraction only if the memory value is greater than or
1533-
equal to the data value.
1534-
15351530
llvm.amdgcn.s.barrier.signal.isfirst Provides access to the s_barrier_signal_first instruction;
15361531
additionally ensures that the result value is valid even when the
15371532
intrinsic is used from a wave that is not running in a workgroup.

llvm/docs/ReleaseNotes.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ Changes to the AArch64 Backend
114114
Changes to the AMDGPU Backend
115115
-----------------------------
116116

117+
* Removed `llvm.amdgcn.atomic.cond.sub.u32` and
118+
`llvm.amdgcn.atomic.csub.u32` intrinsics. Users should use the
119+
`atomicrmw` instruction with `usub_cond` and `usub_sat` instead.
120+
117121
Changes to the ARM Backend
118122
--------------------------
119123

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2888,8 +2888,6 @@ class AMDGPUAtomicRtn<LLVMType vt, LLVMType pt = llvm_anyptr_ty> : Intrinsic <
28882888
[IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>, IntrNoCallback, IntrNoFree], "",
28892889
[SDNPMemOperand]>;
28902890

2891-
def int_amdgcn_global_atomic_csub : AMDGPUAtomicRtn<llvm_i32_ty>;
2892-
28932891
// uint4 llvm.amdgcn.image.bvh.intersect.ray <node_ptr>, <ray_extent>, <ray_origin>,
28942892
// <ray_dir>, <ray_inv_dir>, <texture_descr>
28952893
// <node_ptr> is i32 or i64.
@@ -3137,8 +3135,6 @@ def int_amdgcn_flat_atomic_fmax_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
31373135
def int_amdgcn_global_atomic_fmin_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
31383136
def int_amdgcn_global_atomic_fmax_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
31393137

3140-
def int_amdgcn_atomic_cond_sub_u32 : AMDGPUAtomicRtn<llvm_i32_ty>;
3141-
31423138
class AMDGPULoadIntrinsic<LLVMType ptr_ty>:
31433139
Intrinsic<
31443140
[llvm_any_ty],

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1274,9 +1274,10 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
12741274
}
12751275

12761276
if (Name.consume_front("atomic.")) {
1277-
if (Name.starts_with("inc") || Name.starts_with("dec")) {
1278-
// These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1279-
// there's no new declaration.
1277+
if (Name.starts_with("inc") || Name.starts_with("dec") ||
1278+
Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1279+
// These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1280+
// and usub_sat so there's no new declaration.
12801281
NewFn = nullptr;
12811282
return true;
12821283
}
@@ -4606,7 +4607,9 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
46064607
.StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
46074608
.StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
46084609
.StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4609-
.StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax);
4610+
.StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4611+
.StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4612+
.StartsWith("atomic.csub", AtomicRMWInst::USubSat);
46104613

46114614
unsigned NumOperands = CI->getNumOperands();
46124615
if (NumOperands < 3) // Malformed bitcode.

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -636,15 +636,11 @@ multiclass local_addr_space_atomic_op {
636636
}
637637
}
638638

639-
defm int_amdgcn_global_atomic_csub : noret_op;
640639
defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op;
641640
defm int_amdgcn_flat_atomic_fmin_num : noret_op;
642641
defm int_amdgcn_flat_atomic_fmax_num : noret_op;
643642
defm int_amdgcn_global_atomic_fmin_num : noret_op;
644643
defm int_amdgcn_global_atomic_fmax_num : noret_op;
645-
defm int_amdgcn_atomic_cond_sub_u32 : local_addr_space_atomic_op;
646-
defm int_amdgcn_atomic_cond_sub_u32 : flat_addr_space_atomic_op;
647-
defm int_amdgcn_atomic_cond_sub_u32 : global_addr_space_atomic_op;
648644

649645
multiclass noret_binary_atomic_op<SDNode atomic_op> {
650646
let HasNoUse = true in

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5309,12 +5309,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
53095309
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
53105310
break;
53115311
}
5312-
case Intrinsic::amdgcn_global_atomic_csub:
53135312
case Intrinsic::amdgcn_global_atomic_fmin_num:
53145313
case Intrinsic::amdgcn_global_atomic_fmax_num:
53155314
case Intrinsic::amdgcn_flat_atomic_fmin_num:
53165315
case Intrinsic::amdgcn_flat_atomic_fmax_num:
5317-
case Intrinsic::amdgcn_atomic_cond_sub_u32:
53185316
case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
53195317
case Intrinsic::amdgcn_global_load_tr_b64:
53205318
case Intrinsic::amdgcn_global_load_tr_b128:

llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,6 @@ def : SourceOfDivergence<int_amdgcn_mbcnt_lo>;
237237
def : SourceOfDivergence<int_r600_read_tidig_x>;
238238
def : SourceOfDivergence<int_r600_read_tidig_y>;
239239
def : SourceOfDivergence<int_r600_read_tidig_z>;
240-
def : SourceOfDivergence<int_amdgcn_atomic_cond_sub_u32>;
241-
def : SourceOfDivergence<int_amdgcn_global_atomic_csub>;
242240
def : SourceOfDivergence<int_amdgcn_global_atomic_fmin_num>;
243241
def : SourceOfDivergence<int_amdgcn_global_atomic_fmax_num>;
244242
def : SourceOfDivergence<int_amdgcn_global_atomic_ordered_add_b64>;

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 18 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -783,37 +783,20 @@ multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,
783783

784784
multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
785785
RegisterOperand vdataClass,
786-
ValueType vdataType,
787-
SDPatternOperator atomic> {
786+
ValueType vdataType> {
788787
let FPAtomic = vdataType.isFP in {
789-
def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass, 0,
790-
[(set vdataType:$vdata,
791-
(atomic (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset),
792-
vdataType:$vdata_in))]>,
793-
MUBUFAddr64Table <0, NAME # "_RTN">;
794-
795-
def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, 0,
796-
[(set vdataType:$vdata,
797-
(atomic (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i32:$offset),
798-
vdataType:$vdata_in))]>,
799-
MUBUFAddr64Table <1, NAME # "_RTN">;
800-
788+
def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass, 0>,
789+
MUBUFAddr64Table <0, NAME # "_RTN">;
790+
def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, 0>,
791+
MUBUFAddr64Table <1, NAME # "_RTN">;
801792
def _OFFEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, 0>;
802793
def _IDXEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, 0>;
803794
def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, 0>;
804795

805-
def _VBUFFER_OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName #_vbuffer, BUFAddrKind.Offset, vdataClass, 1,
806-
[(set vdataType:$vdata,
807-
(atomic (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset),
808-
vdataType:$vdata_in))]>,
809-
MUBUFAddr64Table <0, NAME # "_VBUFFER_RTN">;
810-
811-
def _VBUFFER_ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName #_vbuffer, BUFAddrKind.Addr64, vdataClass, 1,
812-
[(set vdataType:$vdata,
813-
(atomic (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i32:$offset),
814-
vdataType:$vdata_in))]>,
815-
MUBUFAddr64Table <1, NAME # "_VBUFFER_RTN">;
816-
796+
def _VBUFFER_OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName #_vbuffer, BUFAddrKind.Offset, vdataClass, 1>,
797+
MUBUFAddr64Table <0, NAME # "_VBUFFER_RTN">;
798+
def _VBUFFER_ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName #_vbuffer, BUFAddrKind.Addr64, vdataClass, 1>,
799+
MUBUFAddr64Table <1, NAME # "_VBUFFER_RTN">;
817800
def _VBUFFER_OFFEN_RTN : MUBUF_AtomicRet_Pseudo <opName #_vbuffer, BUFAddrKind.OffEn, vdataClass, 1>;
818801
def _VBUFFER_IDXEN_RTN : MUBUF_AtomicRet_Pseudo <opName #_vbuffer, BUFAddrKind.IdxEn, vdataClass, 1>;
819802
def _VBUFFER_BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName #_vbuffer, BUFAddrKind.BothEn, vdataClass, 1>;
@@ -822,10 +805,9 @@ multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
822805

823806
multiclass MUBUF_Pseudo_Atomics <string opName,
824807
RegisterOperand vdataClass,
825-
ValueType vdataType,
826-
SDPatternOperator atomic = null_frag> :
808+
ValueType vdataType> :
827809
MUBUF_Pseudo_Atomics_NO_RTN<opName, vdataClass, vdataType>,
828-
MUBUF_Pseudo_Atomics_RTN<opName, vdataClass, vdataType, atomic>;
810+
MUBUF_Pseudo_Atomics_RTN<opName, vdataClass, vdataType>;
829811

830812

831813
//===----------------------------------------------------------------------===//
@@ -1096,7 +1078,7 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
10961078

10971079
let OtherPredicates = [HasGFX10_BEncoding] in {
10981080
defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics <
1099-
"buffer_atomic_csub", VGPROp_32, i32, int_amdgcn_global_atomic_csub
1081+
"buffer_atomic_csub", VGPROp_32, i32
11001082
>;
11011083
}
11021084

@@ -1117,22 +1099,22 @@ def BUFFER_WBINVL1_SC : MUBUF_Invalidate <"buffer_wbinvl1_sc",
11171099
let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
11181100

11191101
defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <
1120-
"buffer_atomic_fcmpswap", AVLdSt_64, v2f32, null_frag
1102+
"buffer_atomic_fcmpswap", AVLdSt_64, v2f32
11211103
>;
11221104
}
11231105

11241106
let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts in {
11251107
defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics <
1126-
"buffer_atomic_fmin", AVLdSt_32, f32, null_frag
1108+
"buffer_atomic_fmin", AVLdSt_32, f32
11271109
>;
11281110
defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics <
1129-
"buffer_atomic_fmax", AVLdSt_32, f32, null_frag
1111+
"buffer_atomic_fmax", AVLdSt_32, f32
11301112
>;
11311113
}
11321114

11331115
let SubtargetPredicate = isGFX6GFX7GFX10 in {
11341116
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <
1135-
"buffer_atomic_fcmpswap_x2", VGPROp_128, v2f64, null_frag
1117+
"buffer_atomic_fcmpswap_x2", VGPROp_128, v2f64
11361118
>;
11371119
}
11381120

@@ -1201,12 +1183,12 @@ defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
12011183

12021184
let SubtargetPredicate = HasAtomicFaddRtnInsts in
12031185
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_RTN<
1204-
"buffer_atomic_add_f32", AVLdSt_32, f32, null_frag
1186+
"buffer_atomic_add_f32", AVLdSt_32, f32
12051187
>;
12061188

12071189
let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in
12081190
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_RTN <
1209-
"buffer_atomic_pk_add_f16", AVLdSt_32, v2f16, null_frag
1191+
"buffer_atomic_pk_add_f16", AVLdSt_32, v2f16
12101192
>;
12111193

12121194
let SubtargetPredicate = isGFX12Plus in {

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -886,15 +886,6 @@ defm DS_SUB_CLAMP_RTN_U32 : DS_1A1D_RET_mc_gfx9<"ds_sub_clamp_rtn_u32", VGPROp_3
886886
def DS_BPERMUTE_FI_B32 : DS_1A1D_PERMUTE <"ds_bpermute_fi_b32",
887887
int_amdgcn_ds_bpermute_fi_b32>;
888888

889-
multiclass DSAtomicRetNoRetPatIntrinsic_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
890-
ValueType vt, string frag> {
891-
def : DSAtomicRetPat<inst, vt,
892-
!cast<PatFrag>(frag#"_local_addrspace")>;
893-
def : DSAtomicRetPat<noRetInst, vt,
894-
!cast<PatFrag>(frag#"_noret_local_addrspace"), /* complexity */ 1>;
895-
}
896-
897-
defm : DSAtomicRetNoRetPatIntrinsic_mc<DS_COND_SUB_RTN_U32, DS_COND_SUB_U32, i32, "int_amdgcn_atomic_cond_sub_u32">;
898889
} // let SubtargetPredicate = isGFX12Plus
899890

900891
let SubtargetPredicate = isGFX1250Plus in {

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1562,10 +1562,6 @@ multiclass FlatAtomicNoRtnPatBase <string base_inst_name, string node, ValueType
15621562
}
15631563
}
15641564

1565-
multiclass FlatAtomicNoRtnPatWithAddrSpace<string inst, string node, string addrSpaceSuffix,
1566-
ValueType vt> :
1567-
FlatAtomicNoRtnPatBase<inst, node # "_noret_" # addrSpaceSuffix, vt, vt>;
1568-
15691565
multiclass FlatAtomicNoRtnPat <string inst, string node, ValueType vt,
15701566
ValueType data_vt = vt, bit isIntr = 0> :
15711567
FlatAtomicNoRtnPatBase<inst, node # "_noret" # !if(isIntr, "", "_"#vt), vt, data_vt>;
@@ -1590,10 +1586,6 @@ multiclass FlatAtomicRtnPatBase <string inst_name, string node, ValueType vt,
15901586
}
15911587
}
15921588

1593-
multiclass FlatAtomicRtnPatWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1594-
ValueType vt> :
1595-
FlatAtomicRtnPatBase<inst, intr # "_" # addrSpaceSuffix, vt, vt>;
1596-
15971589
multiclass FlatAtomicRtnPat <string inst, string node, ValueType vt,
15981590
ValueType data_vt = vt, bit isIntr = 0> :
15991591
FlatAtomicRtnPatBase<inst, node # !if(isIntr, "", "_"#vt), vt, data_vt>;
@@ -2189,9 +2181,6 @@ let SubtargetPredicate = HasAtomicCondSubClampFlatInsts in {
21892181
defm : FlatStorePats <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
21902182
defm : FlatStorePats <FLAT_STORE_SHORT, store_flat, i16>;
21912183

2192-
defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
2193-
defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
2194-
21952184
let OtherPredicates = [HasD16LoadStore] in {
21962185
defm : FlatStorePats <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
21972186
defm : FlatStorePats <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;

0 commit comments

Comments
 (0)