[PATCH] D94056: [CodeGen] Update transformations to use poison for shufflevector/insertelem's initial vector elem

Juneyoung Lee via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 4 19:39:41 PST 2021


aqjune created this revision.
aqjune added reviewers: fhahn, spatel, nikic, dmgreen.
Herald added a subscriber: hiraditya.
aqjune requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

This patch is a part of D93817 <https://reviews.llvm.org/D93817> and makes transformations in CodeGen use poison for shufflevector/insertelem's initial vector element.

The change in CodeGenPrepare.cpp is fine because the mask of shufflevector should be always zero.
It doesn't touch the second element (which is poison).

The change in InterleavedAccessPass.cpp is also fine becauses the mask is of the form <a, a+m, a+2m, .., a+km> where a+km is smaller than
the size of the first vector operand.
This is guaranteed by the caller of replaceBinOpShuffles, which is lowerInterleavedLoad.
It calls isDeInterleaveMask and isDeInterleaveMaskOfFactor to check the mask is the desirable form.
isDeInterleaveMask has the check that a+km is smaller than the vector size.
To check my understanding, I added an assertion & added a test to show that this optimization doesn't fire in such case.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D94056

Files:
  llvm/lib/CodeGen/CodeGenPrepare.cpp
  llvm/lib/CodeGen/InterleavedAccessPass.cpp
  llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll


Index: llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll
===================================================================
--- llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll
+++ llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll
@@ -10,8 +10,8 @@
 ; CHECK-LABEL: @shuffle_binop_fol(
 ; CHECK-NEXT:  vector.body.preheader:
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[PTR:%.*]], align 8
-; CHECK-NEXT:    [[EXTRACTED1:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> undef, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[EXTRACTED2:%.*]] = shufflevector <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, <4 x double> undef, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[EXTRACTED1:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[EXTRACTED2:%.*]] = shufflevector <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, <4 x double> poison, <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[FADD3:%.*]] = fadd <2 x double> [[EXTRACTED1]], [[EXTRACTED2]]
 ; CHECK-NEXT:    ret <2 x double> [[FADD3]]
 ;
@@ -22,6 +22,21 @@
   ret <2 x double> %extracted
 }
 
+define <2 x double> @shuffle_binop_fol_oob(<4 x double>* %ptr) {
+; CHECK-LABEL: @shuffle_binop_fol_oob(
+; CHECK-NEXT:  vector.body.preheader:
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[PTR:%.*]], align 8
+; CHECK-NEXT:    [[FADD:%.*]] = fadd <4 x double> [[WIDE_LOAD]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[EXTRACTED:%.*]] = shufflevector <4 x double> [[FADD]], <4 x double> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-NEXT:    ret <2 x double> [[EXTRACTED]]
+;
+vector.body.preheader:
+  %wide.load = load <4 x double>, <4 x double>* %ptr, align 8
+  %fadd = fadd <4 x double> %wide.load, <double 1.0, double 1.0, double 1.0, double 1.0>
+  %extracted = shufflevector <4 x double> %fadd, <4 x double> undef, <2 x i32> <i32 0, i32 4>
+  ret <2 x double> %extracted
+}
+
 ; No interleaved load instruction is generated, but the extractelement
 ; instructions are updated to use the shuffle instead of the load.
 define void @shuffle_extract(<4 x double>* %ptr, i1 %c) {
Index: llvm/lib/CodeGen/InterleavedAccessPass.cpp
===================================================================
--- llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -22,8 +22,8 @@
 //
 // E.g. An interleaved load (Factor = 2):
 //        %wide.vec = load <8 x i32>, <8 x i32>* %ptr
-//        %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <0, 2, 4, 6>
-//        %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <1, 3, 5, 7>
+//        %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <0, 2, 4, 6>
+//        %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <1, 3, 5, 7>
 //
 // It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
 // intrinsic in ARM backend.
@@ -395,12 +395,16 @@
   for (auto *SVI : BinOpShuffles) {
     BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
     ArrayRef<int> Mask = SVI->getShuffleMask();
+    assert(Mask.back() < 0 ||
+           (unsigned)Mask.back() <
+               cast<FixedVectorType>(BI->getOperand(0)->getType())
+                   ->getNumElements());
 
     auto *NewSVI1 = new ShuffleVectorInst(
-        BI->getOperand(0), UndefValue::get(BI->getOperand(0)->getType()), Mask,
+        BI->getOperand(0), PoisonValue::get(BI->getOperand(0)->getType()), Mask,
         SVI->getName(), SVI);
     auto *NewSVI2 = new ShuffleVectorInst(
-        BI->getOperand(1), UndefValue::get(BI->getOperand(1)->getType()), Mask,
+        BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask,
         SVI->getName(), SVI);
     Value *NewBI = BinaryOperator::Create(BI->getOpcode(), NewSVI1, NewSVI2,
                                           BI->getName(), SVI);
Index: llvm/lib/CodeGen/CodeGenPrepare.cpp
===================================================================
--- llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -6700,6 +6700,7 @@
 /// in MVE takes a GPR (integer) register, and the instruction that incorporate
 /// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
 bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
+  // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
   if (!match(SVI, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
                             m_Undef(), m_ZeroMask())))
     return false;
@@ -6719,7 +6720,7 @@
   Builder.SetInsertPoint(SVI);
   Value *BC1 = Builder.CreateBitCast(
       cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
-  Value *Insert = Builder.CreateInsertElement(UndefValue::get(NewVecType), BC1,
+  Value *Insert = Builder.CreateInsertElement(PoisonValue::get(NewVecType), BC1,
                                               (uint64_t)0);
   Value *Shuffle = Builder.CreateShuffleVector(Insert, SVI->getShuffleMask());
   Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D94056.314492.patch
Type: text/x-patch
Size: 5347 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210105/71324944/attachment.bin>


More information about the llvm-commits mailing list