[PATCH] D79987: [DAG] SimplifyDemandedVectorElts Bug fix for rG7cb5a51f386d

Bing Yu via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri May 15 10:18:33 PDT 2020


yubing updated this revision to Diff 264269.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79987/new/

https://reviews.llvm.org/D79987

Files:
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/test/CodeGen/X86/simplifydemandedvectorselts-broadcast.ll


Index: llvm/test/CodeGen/X86/simplifydemandedvectorselts-broadcast.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/simplifydemandedvectorselts-broadcast.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 | FileCheck %s
+
+; Function Attrs: noinline nounwind optnone uwtable
+define  <16 x i32> @main(<3 x i32>* %ptr) {
+; CHECK-LABEL: main:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    vpinsrd $2, 8(%rdi), %xmm0, %xmm1
+; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7]
+; CHECK-NEXT:    vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT:    retq
+entry:
+  %int3 = load <3 x i32>, <3 x i32>* %ptr, align 1
+  %0 = shufflevector <3 x i32> %int3, <3 x i32> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %1 = shufflevector <16 x i32> <i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0>, <16 x i32> %0, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i32 > %1
+}
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37007,6 +37007,22 @@
           SrcElts.setBit(M);
       }
 
+    // As for OpInputs[Src] which has users excluding Op.getNode(),
+    // we assume that all elements are needed, i.e, set SrcElts.setAllBits()
+    // For example:
+    // t1317: v8i32 = insert_subvector undef:v8i32, t1414, Constant:i64<0>
+    // t1315: v8i32 = X86ISD::BLENDI t380, t1317, TargetConstant:i8<2>
+    // t1414: v4i32 = insert_vector_elt t679, t677, Constant:i64<2>
+    // t1416: v8i32 = X86ISD::VBROADCAST t1414
+    // When getTargetShuffleInputs(...) processed t1416, it created
+    //  NewNode: v8i32 = insert_subvector undef:v8i32, t1414, Constant:i64<0>
+    //  which is the same with t1317.
+    // So getTargetShuffleInputs(...) set
+    //  OpInputs[0] = t1317 which is used by t1315
+    // Before SimplifyDemandedVectorElts processes OpInputs[0] which is used by
+    // t1315, we assume that all elements are needed, i.e. SrcElts.setAllBits()
+    if (!OpInputs[Src].isOperandOf(Op.getNode()) && !OpInputs[Src].use_empty())
+      SrcElts.setAllBits();
     // TODO - Propagate input undef/zero elts.
     APInt SrcUndef, SrcZero;
     if (SimplifyDemandedVectorElts(OpInputs[Src], SrcElts, SrcUndef, SrcZero,


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D79987.264269.patch
Type: text/x-patch
Size: 3030 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200515/c7f0a3a5/attachment.bin>


More information about the llvm-commits mailing list