[llvm] 9f5d783 - [X86][SSE] combineScalarToVector - only reuse broadcasts for scalar_to_vector if the source operands scalar types match
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 2 14:06:02 PDT 2021
Author: Simon Pilgrim
Date: 2021-06-02T22:05:40+01:00
New Revision: 9f5d783d467523d6218a63883b45192c8a875c96
URL: https://github.com/llvm/llvm-project/commit/9f5d783d467523d6218a63883b45192c8a875c96
DIFF: https://github.com/llvm/llvm-project/commit/9f5d783d467523d6218a63883b45192c8a875c96.diff
LOG: [X86][SSE] combineScalarToVector - only reuse broadcasts for scalar_to_vector if the source operands scalar types match
We were hitting an issue when the scalar_to_vector source was being implicitly truncated (in this case to i8 to vXi1) but we were also using the i8 source in a broadcast to a vXi8 value.
Fixes PR50374
Added:
llvm/test/CodeGen/X86/pr50374.ll
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1fcb52aee83b..a6582879f6f3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -50473,16 +50473,20 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
// See if we're broadcasting the scalar value, in which case just reuse that.
// Ensure the same SDValue from the SDNode use is being used.
- for (SDNode *User : Src->uses())
- if (User->getOpcode() == X86ISD::VBROADCAST && Src == User->getOperand(0)) {
- unsigned SizeInBits = VT.getFixedSizeInBits();
- unsigned BroadcastSizeInBits = User->getValueSizeInBits(0).getFixedSize();
- if (BroadcastSizeInBits == SizeInBits)
- return SDValue(User, 0);
- if (BroadcastSizeInBits > SizeInBits)
- return extractSubVector(SDValue(User, 0), 0, DAG, DL, SizeInBits);
- // TODO: Handle BroadcastSizeInBits < SizeInBits when we have test coverage.
- }
+ if (VT.getScalarType() == Src.getValueType())
+ for (SDNode *User : Src->uses())
+ if (User->getOpcode() == X86ISD::VBROADCAST &&
+ Src == User->getOperand(0)) {
+ unsigned SizeInBits = VT.getFixedSizeInBits();
+ unsigned BroadcastSizeInBits =
+ User->getValueSizeInBits(0).getFixedSize();
+ if (BroadcastSizeInBits == SizeInBits)
+ return SDValue(User, 0);
+ if (BroadcastSizeInBits > SizeInBits)
+ return extractSubVector(SDValue(User, 0), 0, DAG, DL, SizeInBits);
+ // TODO: Handle BroadcastSizeInBits < SizeInBits when we have test
+ // coverage.
+ }
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/pr50374.ll b/llvm/test/CodeGen/X86/pr50374.ll
new file mode 100644
index 000000000000..fe1286d2ab12
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr50374.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s
+
+define void @PR50374() {
+; CHECK-LABEL: PR50374:
+; CHECK: # %bb.0: # %while.84.body.preheader
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: .LBB0_1: # %vector.body1999
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: jmp .LBB0_1
+while.84.body.preheader:
+ %0 = load i8, i8* undef, align 1
+ %1 = load i8, i8* undef, align 4
+ %.not14.2.2 = icmp eq i8 0, 0
+ %2 = and i8 %0, 1
+ %.not14.1.2 = icmp eq i8 %2, 0
+ %3 = select i1 %.not14.2.2, i1 %.not14.1.2, i1 false
+ %.not14.2361 = icmp eq i8 0, 0
+ %4 = select i1 %3, i1 %.not14.2361, i1 false
+ %add.10961.i.2.2 = select reassoc nsz contract i1 %4, float 0.000000e+00, float 0x7FF8000000000000
+ %broadcast.splatinsert2024 = insertelement <8 x float> poison, float %add.10961.i.2.2, i32 0
+ %broadcast.splat2025 = shufflevector <8 x float> %broadcast.splatinsert2024, <8 x float> poison, <8 x i32> zeroinitializer
+ %broadcast.splatinsert2049 = insertelement <8 x i8> poison, i8 %1, i32 0
+ %broadcast.splat2050 = shufflevector <8 x i8> %broadcast.splatinsert2049, <8 x i8> poison, <8 x i32> zeroinitializer
+ br label %vector.body1999
+
+vector.body1999: ; preds = %vector.body1999, %while.84.body.preheader
+ %predphi2026 = select <8 x i1> undef, <8 x float> undef, <8 x float> %broadcast.splat2025
+ %predphi2051 = select <8 x i1> undef, <8 x i8> %broadcast.splat2050, <8 x i8> undef
+ br label %vector.body1999
+}
More information about the llvm-commits
mailing list