[llvm] ce2ec06 - [X86] Only fold broadcast with extract_vector_elt/scalar_to_vector if the scalar type matches the vector element type
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 5 08:01:46 PDT 2023
Author: Simon Pilgrim
Date: 2023-08-05T16:01:22+01:00
New Revision: ce2ec065165b9c675394e182717b5dec8f3b0535
URL: https://github.com/llvm/llvm-project/commit/ce2ec065165b9c675394e182717b5dec8f3b0535
DIFF: https://github.com/llvm/llvm-project/commit/ce2ec065165b9c675394e182717b5dec8f3b0535.diff
LOG: [X86] Only fold broadcast with extract_vector_elt/scalar_to_vector if the scalar type matches the vector element type
Avoid handling implicit extension/truncation with scalar<->vector transfers
Fixes #64439
Added:
llvm/test/CodeGen/X86/pr64439.ll
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 524736dd3b416b..ab36260ee658d6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -39683,12 +39683,15 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
extract128BitVector(Src, 0, DAG, DL));
// broadcast(scalar_to_vector(x)) -> broadcast(x).
- if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ Src.getValueType().getScalarType() == Src.getOperand(0).getValueType())
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
// broadcast(extract_vector_elt(x, 0)) -> broadcast(x).
if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isNullConstant(Src.getOperand(1)) &&
+ Src.getValueType() ==
+ Src.getOperand(0).getValueType().getScalarType() &&
DAG.getTargetLoweringInfo().isTypeLegal(
Src.getOperand(0).getValueType()))
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
diff --git a/llvm/test/CodeGen/X86/pr64439.ll b/llvm/test/CodeGen/X86/pr64439.ll
new file mode 100644
index 00000000000000..7aa52fc49a9fcf
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr64439.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s
+
+define void @f(ptr %0, <32 x i1> %1, i32 %2) nounwind {
+; CHECK-LABEL: f:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0
+; CHECK-NEXT: vpmovb2m %ymm0, %k0
+; CHECK-NEXT: kshiftrd $3, %k0, %k1
+; CHECK-NEXT: kmovd %k1, %eax
+; CHECK-NEXT: vpbroadcastb %esi, %ymm0
+; CHECK-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1
+; CHECK-NEXT: vpmovm2b %k0, %ymm0
+; CHECK-NEXT: vpbroadcastb %eax, %ymm0 {%k1}
+; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0
+; CHECK-NEXT: vpmovb2m %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, (%rdi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %E = extractelement <32 x i1> %1, i32 3
+ %I = insertelement <32 x i1> %1, i1 %E, i32 %2
+ store <32 x i1> %I, ptr %0
+ ret void
+}
More information about the llvm-commits
mailing list