Skip to content

Commit cdce445

Browse files
authored
[X86] isLoadBitCastBeneficial - its only beneficial to bitcast between vector types if the new type is legal (#171813)
Prevents us from attempting to store illegal types like <2 x i128> that will force scalarization/splitting Noticed while trying to avoid some split stores mentioned in #171616
1 parent f582fc6 commit cdce445

File tree

2 files changed

+13
-16
lines changed

2 files changed

+13
-16
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3489,10 +3489,12 @@ bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
34893489
if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
34903490
return false;
34913491

3492-
// If both types are legal vectors, it's always ok to convert them.
3493-
if (LoadVT.isVector() && BitcastVT.isVector() &&
3494-
isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
3495-
return true;
3492+
if (LoadVT.isVector() && BitcastVT.isVector()) {
3493+
// If both types are legal vectors, it's always ok to convert them.
3494+
// Don't convert to an illegal type.
3495+
if (isTypeLegal(LoadVT))
3496+
return isTypeLegal(BitcastVT);
3497+
}
34963498

34973499
// If we have a large vector type (even if illegal), don't bitcast to large
34983500
// (illegal) scalar types. Better to load fewer vectors and extract.

llvm/test/CodeGen/X86/single_elt_vector_memory_operation.ll

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,25 +20,22 @@ define void @load_single_128bit_elt_vector(ptr %in, ptr %off, ptr %out) nounwind
2020
; AVX-LABEL: load_single_128bit_elt_vector:
2121
; AVX: # %bb.0:
2222
; AVX-NEXT: vmovaps (%rdi), %xmm0
23-
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
24-
; AVX-NEXT: vmovaps %xmm1, 16(%rdx)
25-
; AVX-NEXT: vmovaps %xmm0, (%rdx)
23+
; AVX-NEXT: vmovaps %ymm0, (%rdx)
24+
; AVX-NEXT: vzeroupper
2625
; AVX-NEXT: retq
2726
;
2827
; AVX2-LABEL: load_single_128bit_elt_vector:
2928
; AVX2: # %bb.0:
3029
; AVX2-NEXT: vmovaps (%rdi), %xmm0
31-
; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
32-
; AVX2-NEXT: vmovaps %xmm1, 16(%rdx)
33-
; AVX2-NEXT: vmovaps %xmm0, (%rdx)
30+
; AVX2-NEXT: vmovaps %ymm0, (%rdx)
31+
; AVX2-NEXT: vzeroupper
3432
; AVX2-NEXT: retq
3533
;
3634
; AVX512F-LABEL: load_single_128bit_elt_vector:
3735
; AVX512F: # %bb.0:
3836
; AVX512F-NEXT: vmovaps (%rdi), %xmm0
39-
; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
40-
; AVX512F-NEXT: vmovaps %xmm1, 16(%rdx)
41-
; AVX512F-NEXT: vmovaps %xmm0, (%rdx)
37+
; AVX512F-NEXT: vmovaps %ymm0, (%rdx)
38+
; AVX512F-NEXT: vzeroupper
4239
; AVX512F-NEXT: retq
4340
%i0 = load <16 x i8>, ptr %in, align 64
4441
%i1 = bitcast <16 x i8> %i0 to <1 x i128>
@@ -112,9 +109,7 @@ define void @load_single_256bit_elt_vector(ptr %in, ptr %off, ptr %out) nounwind
112109
; AVX512F-LABEL: load_single_256bit_elt_vector:
113110
; AVX512F: # %bb.0:
114111
; AVX512F-NEXT: vmovaps (%rdi), %ymm0
115-
; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
116-
; AVX512F-NEXT: vmovaps %ymm1, 32(%rdx)
117-
; AVX512F-NEXT: vmovaps %ymm0, (%rdx)
112+
; AVX512F-NEXT: vmovaps %zmm0, (%rdx)
118113
; AVX512F-NEXT: vzeroupper
119114
; AVX512F-NEXT: retq
120115
%i0 = load <32 x i8>, ptr %in, align 64

0 commit comments

Comments
 (0)