[X86] isLoadBitCastBeneficial - its only beneficial to bitcast between vector types if the new type is legal (#171813)

RKSimon · web-flow · commit cdce445c9c7a · 2025-12-11T14:11:33.000Z
Prevents us from attempting to store illegal types like <2 x i128> that will force scalarization/splitting Noticed while trying to avoid some split stores mentioned in #171616
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3489,10 +3489,12 @@ bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
   if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
     return false;
 
-  // If both types are legal vectors, it's always ok to convert them.
-  if (LoadVT.isVector() && BitcastVT.isVector() &&
-      isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
-    return true;
+  if (LoadVT.isVector() && BitcastVT.isVector()) {
+    // If both types are legal vectors, it's always ok to convert them.
+    // Don't convert to an illegal type.
+    if (isTypeLegal(LoadVT))
+      return isTypeLegal(BitcastVT);
+  }
 
   // If we have a large vector type (even if illegal), don't bitcast to large
   // (illegal) scalar types. Better to load fewer vectors and extract.
diff --git a/llvm/test/CodeGen/X86/single_elt_vector_memory_operation.ll b/llvm/test/CodeGen/X86/single_elt_vector_memory_operation.ll
@@ -20,25 +20,22 @@ define void @load_single_128bit_elt_vector(ptr %in, ptr %off, ptr %out) nounwind
 ; AVX-LABEL: load_single_128bit_elt_vector:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vmovaps (%rdi), %xmm0
-; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vmovaps %xmm1, 16(%rdx)
-; AVX-NEXT:    vmovaps %xmm0, (%rdx)
+; AVX-NEXT:    vmovaps %ymm0, (%rdx)
+; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
 ;
 ; AVX2-LABEL: load_single_128bit_elt_vector:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vmovaps (%rdi), %xmm0
-; AVX2-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT:    vmovaps %xmm1, 16(%rdx)
-; AVX2-NEXT:    vmovaps %xmm0, (%rdx)
+; AVX2-NEXT:    vmovaps %ymm0, (%rdx)
+; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: load_single_128bit_elt_vector:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vmovaps (%rdi), %xmm0
-; AVX512F-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT:    vmovaps %xmm1, 16(%rdx)
-; AVX512F-NEXT:    vmovaps %xmm0, (%rdx)
+; AVX512F-NEXT:    vmovaps %ymm0, (%rdx)
+; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
   %i0 = load <16 x i8>, ptr %in, align 64
   %i1 = bitcast <16 x i8> %i0 to <1 x i128>
@@ -112,9 +109,7 @@ define void @load_single_256bit_elt_vector(ptr %in, ptr %off, ptr %out) nounwind
 ; AVX512F-LABEL: load_single_256bit_elt_vector:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vmovaps (%rdi), %ymm0
-; AVX512F-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT:    vmovaps %ymm1, 32(%rdx)
-; AVX512F-NEXT:    vmovaps %ymm0, (%rdx)
+; AVX512F-NEXT:    vmovaps %zmm0, (%rdx)
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
   %i0 = load <32 x i8>, ptr %in, align 64