[llvm] ce2ec06 - [X86] Only fold broadcast with extract_vector_elt/scalar_to_vector if the scalar type matches the vector element type

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Aug 5 08:01:46 PDT 2023


Author: Simon Pilgrim
Date: 2023-08-05T16:01:22+01:00
New Revision: ce2ec065165b9c675394e182717b5dec8f3b0535

URL: https://github.com/llvm/llvm-project/commit/ce2ec065165b9c675394e182717b5dec8f3b0535
DIFF: https://github.com/llvm/llvm-project/commit/ce2ec065165b9c675394e182717b5dec8f3b0535.diff

LOG: [X86] Only fold broadcast with extract_vector_elt/scalar_to_vector if the scalar type matches the vector element type

Avoid handling implicit extension/truncation with scalar<->vector transfers

Fixes #64439

Added: 
    llvm/test/CodeGen/X86/pr64439.ll

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 524736dd3b416b..ab36260ee658d6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -39683,12 +39683,15 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
                          extract128BitVector(Src, 0, DAG, DL));
 
     // broadcast(scalar_to_vector(x)) -> broadcast(x).
-    if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR)
+    if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+        Src.getValueType().getScalarType() == Src.getOperand(0).getValueType())
       return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
 
     // broadcast(extract_vector_elt(x, 0)) -> broadcast(x).
     if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
         isNullConstant(Src.getOperand(1)) &&
+        Src.getValueType() ==
+            Src.getOperand(0).getValueType().getScalarType() &&
         DAG.getTargetLoweringInfo().isTypeLegal(
             Src.getOperand(0).getValueType()))
       return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));

diff  --git a/llvm/test/CodeGen/X86/pr64439.ll b/llvm/test/CodeGen/X86/pr64439.ll
new file mode 100644
index 00000000000000..7aa52fc49a9fcf
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr64439.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s
+
+define void @f(ptr %0, <32 x i1> %1, i32 %2) nounwind {
+; CHECK-LABEL: f:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpsllw $7, %ymm0, %ymm0
+; CHECK-NEXT:    vpmovb2m %ymm0, %k0
+; CHECK-NEXT:    kshiftrd $3, %k0, %k1
+; CHECK-NEXT:    kmovd %k1, %eax
+; CHECK-NEXT:    vpbroadcastb %esi, %ymm0
+; CHECK-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1
+; CHECK-NEXT:    vpmovm2b %k0, %ymm0
+; CHECK-NEXT:    vpbroadcastb %eax, %ymm0 {%k1}
+; CHECK-NEXT:    vpsllw $7, %ymm0, %ymm0
+; CHECK-NEXT:    vpmovb2m %ymm0, %k0
+; CHECK-NEXT:    kmovd %k0, (%rdi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %E = extractelement <32 x i1> %1, i32 3
+  %I = insertelement <32 x i1> %1, i1 %E, i32 %2
+  store <32 x i1> %I, ptr %0
+  ret void
+}


        


More information about the llvm-commits mailing list