[llvm] 9f5d783 - [X86][SSE] combineScalarToVector - only reuse broadcasts for scalar_to_vector if the source operands scalar types match

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 2 14:06:02 PDT 2021


Author: Simon Pilgrim
Date: 2021-06-02T22:05:40+01:00
New Revision: 9f5d783d467523d6218a63883b45192c8a875c96

URL: https://github.com/llvm/llvm-project/commit/9f5d783d467523d6218a63883b45192c8a875c96
DIFF: https://github.com/llvm/llvm-project/commit/9f5d783d467523d6218a63883b45192c8a875c96.diff

LOG: [X86][SSE] combineScalarToVector - only reuse broadcasts for scalar_to_vector if the source operands scalar types match

We were hitting an issue when the scalar_to_vector source was being implicitly truncated (in this case to i8 to vXi1) but we were also using the i8 source in a broadcast to a vXi8 value.

Fixes PR50374

Added: 
    llvm/test/CodeGen/X86/pr50374.ll

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1fcb52aee83b..a6582879f6f3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -50473,16 +50473,20 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
 
   // See if we're broadcasting the scalar value, in which case just reuse that.
   // Ensure the same SDValue from the SDNode use is being used.
-  for (SDNode *User : Src->uses())
-    if (User->getOpcode() == X86ISD::VBROADCAST && Src == User->getOperand(0)) {
-      unsigned SizeInBits = VT.getFixedSizeInBits();
-      unsigned BroadcastSizeInBits = User->getValueSizeInBits(0).getFixedSize();
-      if (BroadcastSizeInBits == SizeInBits)
-        return SDValue(User, 0);
-      if (BroadcastSizeInBits > SizeInBits)
-        return extractSubVector(SDValue(User, 0), 0, DAG, DL, SizeInBits);
-      // TODO: Handle BroadcastSizeInBits < SizeInBits when we have test coverage.
-    }
+  if (VT.getScalarType() == Src.getValueType())
+    for (SDNode *User : Src->uses())
+      if (User->getOpcode() == X86ISD::VBROADCAST &&
+          Src == User->getOperand(0)) {
+        unsigned SizeInBits = VT.getFixedSizeInBits();
+        unsigned BroadcastSizeInBits =
+            User->getValueSizeInBits(0).getFixedSize();
+        if (BroadcastSizeInBits == SizeInBits)
+          return SDValue(User, 0);
+        if (BroadcastSizeInBits > SizeInBits)
+          return extractSubVector(SDValue(User, 0), 0, DAG, DL, SizeInBits);
+        // TODO: Handle BroadcastSizeInBits < SizeInBits when we have test
+        // coverage.
+      }
 
   return SDValue();
 }

diff  --git a/llvm/test/CodeGen/X86/pr50374.ll b/llvm/test/CodeGen/X86/pr50374.ll
new file mode 100644
index 000000000000..fe1286d2ab12
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr50374.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s
+
+define void @PR50374() {
+; CHECK-LABEL: PR50374:
+; CHECK:       # %bb.0: # %while.84.body.preheader
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_1: # %vector.body1999
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    jmp .LBB0_1
+while.84.body.preheader:
+  %0 = load i8, i8* undef, align 1
+  %1 = load i8, i8* undef, align 4
+  %.not14.2.2 = icmp eq i8 0, 0
+  %2 = and i8 %0, 1
+  %.not14.1.2 = icmp eq i8 %2, 0
+  %3 = select i1 %.not14.2.2, i1 %.not14.1.2, i1 false
+  %.not14.2361 = icmp eq i8 0, 0
+  %4 = select i1 %3, i1 %.not14.2361, i1 false
+  %add.10961.i.2.2 = select reassoc nsz contract i1 %4, float 0.000000e+00, float 0x7FF8000000000000
+  %broadcast.splatinsert2024 = insertelement <8 x float> poison, float %add.10961.i.2.2, i32 0
+  %broadcast.splat2025 = shufflevector <8 x float> %broadcast.splatinsert2024, <8 x float> poison, <8 x i32> zeroinitializer
+  %broadcast.splatinsert2049 = insertelement <8 x i8> poison, i8 %1, i32 0
+  %broadcast.splat2050 = shufflevector <8 x i8> %broadcast.splatinsert2049, <8 x i8> poison, <8 x i32> zeroinitializer
+  br label %vector.body1999
+
+vector.body1999:                                  ; preds = %vector.body1999, %while.84.body.preheader
+  %predphi2026 = select <8 x i1> undef, <8 x float> undef, <8 x float> %broadcast.splat2025
+  %predphi2051 = select <8 x i1> undef, <8 x i8> %broadcast.splat2050, <8 x i8> undef
+  br label %vector.body1999
+}


        


More information about the llvm-commits mailing list