Skip to content

Commit 2f9b8b7

Browse files
authored
[mlir][amdgpu] Continue lowering make_tdm_descriptor. (#171498)
* changes workgroup mask's type from i16 to vector<16xi1> * changes pad_amount and pad_interval from Index to I32 * adds lit tests for padEnable, iteration and dynamic cases * adds TODO for a future instrumentation pass to validate inputs * adds descriptor groups 2 and 3
1 parent 854ef8d commit 2f9b8b7

File tree

5 files changed

+640
-99
lines changed

5 files changed

+640
-99
lines changed

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,14 @@ def AMDGPU_TDMDescriptorType : AMDGPU_Type<"TDMDescriptor", "tdm_descriptor"> {
110110
This type is opaque and corresponds to the two or four descriptor groups
111111
used in tensor_load_to_lds or tensor_store_from_lds.
112112
}];
113-
114113
}
115114

115+
class AMDGPU_ConcreteVector<Type elem, int length> :
116+
FixedVectorOfLengthAndType<[length], [elem]>,
117+
BuildableType<
118+
"::mlir::VectorType::get({" # length # "} ,"
119+
# elem.builderCall # ")">;
120+
116121
//===----------------------------------------------------------------------===//
117122
// AMDGPU Op definitions
118123
//===----------------------------------------------------------------------===//
@@ -1296,14 +1301,14 @@ def AMDGPU_MakeDmaDescriptorOp :
12961301
DenseI64ArrayAttr: $global_static_strides,
12971302
Variadic<Index>: $shared_dynamic_sizes,
12981303
DenseI64ArrayAttr: $shared_static_sizes,
1299-
Optional<I16>: $workgroup_mask,
1304+
Optional<AMDGPU_ConcreteVector<I1, 16>>: $workgroup_mask,
13001305
Optional<I1>: $early_timeout,
1301-
Optional<Index>: $pad_amount,
1302-
Optional<Index>: $pad_interval,
1306+
Optional<I32>: $pad_amount,
1307+
Optional<I32>: $pad_interval,
13031308
Optional<AnyMemRef>: $atomic_barrier_address,
13041309
Variadic<Index>: $atomic_barrier_indices,
13051310
Optional<Index>: $global_increment,
1306-
Optional<Index>: $lds_increment,
1311+
Optional<I32>: $lds_increment,
13071312
Optional<Index>: $iteration_count)>,
13081313
Results<(outs AMDGPU_TDMDescriptorType: $desc)> {
13091314

@@ -1335,7 +1340,7 @@ def AMDGPU_MakeDmaDescriptorOp :
13351340
2D and 3D tensors may be iterated over by setting $global_increment, $lds_increment, and $iteration_count.
13361341
$global_increment determines how much to increment the starting global memory address per iteration in units of the $base's element type.
13371342
$lds_increment determines how much to increment the starting LDS address per iteration in units of the $base's element type.
1338-
$iterate_count determines how many times to iterate.
1343+
$iterate_count determines how many times to iterate, it must be a value in the inclusive interval [1, 256].
13391344

13401345
```mlir
13411346
// Example of moving a two-dimensional tensor to LDS.
@@ -1345,7 +1350,7 @@ def AMDGPU_MakeDmaDescriptorOp :
13451350

13461351
// Example of moving a two dimension tensor to LDS where padding is applied after every integer.
13471352
%base = amdgpu.make_dma_base %global[0, 0], %lds[0, 0] : memref<32x32xi32>, memref<64x64xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
1348-
%descriptor = amdgpu.make_dma_descriptor %base globalSize [32, 32] globalStride [32, 1] sharedSize [64, 64] padding(%pad_amount pad_every %pad_interval) : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
1353+
%descriptor = amdgpu.make_dma_descriptor %base globalSize [32, 32] globalStride [32, 1] sharedSize [64, 64] padShared(%pad_amount every %pad_interval) : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
13491354
amdgpu.tensor_load_to_lds %descriptor : !amdgpu.tdm_descriptor
13501355
```
13511356
}];

0 commit comments

Comments
 (0)