[llvm] r311221 - [SLPVectorizer] Add opcode parameter to reorderAltShuffleOperands, reorderInputsAccordingToOpcode functions.
Dinar Temirbulatov via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 18 19:54:20 PDT 2017
Author: dinar
Date: Fri Aug 18 19:54:20 2017
New Revision: 311221
URL: http://llvm.org/viewvc/llvm-project?rev=311221&view=rev
Log:
[SLPVectorizer] Add opcode parameter to reorderAltShuffleOperands, reorderInputsAccordingToOpcode functions.
Reviewers: mkuper, RKSimon, ABataev, mzolotukhin, spatel, filcab
Subscribers: llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D36766
Added:
llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder.ll
Modified:
llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=311221&r1=311220&r2=311221&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Fri Aug 18 19:54:20 2017
@@ -596,12 +596,12 @@ private:
/// \reorder commutative operands in alt shuffle if they result in
/// vectorized code.
- void reorderAltShuffleOperands(ArrayRef<Value *> VL,
+ void reorderAltShuffleOperands(unsigned Opcode, ArrayRef<Value *> VL,
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right);
/// \reorder commutative operands to get better probability of
/// generating vectorized code.
- void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+ void reorderInputsAccordingToOpcode(unsigned Opcode, ArrayRef<Value *> VL,
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right);
struct TreeEntry {
@@ -1635,7 +1635,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
// have the same opcode.
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
- reorderInputsAccordingToOpcode(VL, Left, Right);
+ reorderInputsAccordingToOpcode(VL0->getOpcode(), VL, Left, Right);
buildTree_rec(Left, Depth + 1, UserTreeIdx);
buildTree_rec(Right, Depth + 1, UserTreeIdx);
return;
@@ -1799,7 +1799,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
// Reorder operands if reordering would enable vectorization.
if (isa<BinaryOperator>(VL0)) {
ValueList Left, Right;
- reorderAltShuffleOperands(VL, Left, Right);
+ reorderAltShuffleOperands(VL0->getOpcode(), VL, Left, Right);
buildTree_rec(Left, Depth + 1, UserTreeIdx);
buildTree_rec(Right, Depth + 1, UserTreeIdx);
return;
@@ -2344,13 +2344,17 @@ int BoUpSLP::getGatherCost(ArrayRef<Valu
// load a[3] + load b[3]
// Reordering the second load b[1] load a[1] would allow us to vectorize this
// code.
-void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
+void BoUpSLP::reorderAltShuffleOperands(unsigned Opcode, ArrayRef<Value *> VL,
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right) {
// Push left and right operands of binary operation into Left and Right
- for (Value *i : VL) {
- Left.push_back(cast<Instruction>(i)->getOperand(0));
- Right.push_back(cast<Instruction>(i)->getOperand(1));
+ unsigned AltOpcode = getAltOpcode(Opcode);
+ for (Value *V : VL) {
+ auto *I = cast<Instruction>(V);
+ assert(sameOpcodeOrAlt(Opcode, AltOpcode, I->getOpcode()) &&
+ "Incorrect instruction in vector");
+ Left.push_back(I->getOperand(0));
+ Right.push_back(I->getOperand(1));
}
// Reorder if we have a commutative operation and consecutive access
@@ -2395,14 +2399,12 @@ void BoUpSLP::reorderAltShuffleOperands(
// The vectorizer is trying to either have all elements one side being
// instruction with the same opcode to enable further vectorization, or having
// a splat to lower the vectorizing cost.
-static bool shouldReorderOperands(int i, Instruction &I,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right,
- bool AllSameOpcodeLeft,
- bool AllSameOpcodeRight, bool SplatLeft,
- bool SplatRight) {
- Value *VLeft = I.getOperand(0);
- Value *VRight = I.getOperand(1);
+static bool shouldReorderOperands(
+ int i, unsigned Opcode, Instruction &I, ArrayRef<Value *> Left,
+ ArrayRef<Value *> Right, bool AllSameOpcodeLeft, bool AllSameOpcodeRight,
+ bool SplatLeft, bool SplatRight, Value *&VLeft, Value *&VRight) {
+ VLeft = I.getOperand(0);
+ VRight = I.getOperand(1);
// If we have "SplatRight", try to see if commuting is needed to preserve it.
if (SplatRight) {
if (VRight == Right[i - 1])
@@ -2458,15 +2460,19 @@ static bool shouldReorderOperands(int i,
return false;
}
-void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+void BoUpSLP::reorderInputsAccordingToOpcode(unsigned Opcode,
+ ArrayRef<Value *> VL,
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right) {
if (VL.size()) {
// Peel the first iteration out of the loop since there's nothing
// interesting to do anyway and it simplifies the checks in the loop.
- auto VLeft = cast<Instruction>(VL[0])->getOperand(0);
- auto VRight = cast<Instruction>(VL[0])->getOperand(1);
+ auto *I = cast<Instruction>(VL[0]);
+ Value *VLeft;
+ Value *VRight;
+ VLeft = I->getOperand(0);
+ VRight = I->getOperand(1);
if (!isa<Instruction>(VRight) && isa<Instruction>(VLeft))
// Favor having instruction to the right. FIXME: why?
std::swap(VLeft, VRight);
@@ -2483,16 +2489,21 @@ void BoUpSLP::reorderInputsAccordingToOp
for (unsigned i = 1, e = VL.size(); i != e; ++i) {
Instruction *I = cast<Instruction>(VL[i]);
- assert(I->isCommutative() && "Can only process commutative instruction");
+ assert(((I->getOpcode() == Opcode && I->isCommutative()) ||
+ (I->getOpcode() != Opcode && Instruction::isCommutative(Opcode))) &&
+ "Can only process commutative instruction");
// Commute to favor either a splat or maximizing having the same opcodes on
// one side.
- if (shouldReorderOperands(i, *I, Left, Right, AllSameOpcodeLeft,
- AllSameOpcodeRight, SplatLeft, SplatRight)) {
- Left.push_back(I->getOperand(1));
- Right.push_back(I->getOperand(0));
+ Value *VLeft;
+ Value *VRight;
+ if (shouldReorderOperands(i, Opcode, *I, Left, Right, AllSameOpcodeLeft,
+ AllSameOpcodeRight, SplatLeft, SplatRight, VLeft,
+ VRight)) {
+ Left.push_back(VRight);
+ Right.push_back(VLeft);
} else {
- Left.push_back(I->getOperand(0));
- Right.push_back(I->getOperand(1));
+ Left.push_back(VLeft);
+ Right.push_back(VRight);
}
// Update Splat* and AllSameOpcode* after the insertion.
SplatRight = SplatRight && (Right[i - 1] == Right[i]);
@@ -2843,11 +2854,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
case Instruction::Xor: {
ValueList LHSVL, RHSVL;
if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
- reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
+ reorderInputsAccordingToOpcode(VL0->getOpcode(),
+ E->Scalars, LHSVL, RHSVL);
else
for (Value *V : E->Scalars) {
- LHSVL.push_back(cast<Instruction>(V)->getOperand(0));
- RHSVL.push_back(cast<Instruction>(V)->getOperand(1));
+ auto *I = cast<Instruction>(V);
+ LHSVL.push_back(I->getOperand(0));
+ RHSVL.push_back(I->getOperand(1));
}
setInsertPointAfterBundle(E->Scalars, VL0);
@@ -3011,7 +3024,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
case Instruction::ShuffleVector: {
ValueList LHSVL, RHSVL;
assert(isa<BinaryOperator>(VL0) && "Invalid Shuffle Vector Operand");
- reorderAltShuffleOperands(E->Scalars, LHSVL, RHSVL);
+ reorderAltShuffleOperands(VL0->getOpcode(), E->Scalars, LHSVL, RHSVL);
setInsertPointAfterBundle(E->Scalars, VL0);
Value *LHS = vectorizeTree(LHSVL);
Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder.ll?rev=311221&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder.ll (added)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reorder.ll Fri Aug 18 19:54:20 2017
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -slp-vectorizer -mcpu=bdver1 < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at a = common local_unnamed_addr global i32 0, align 4
+ at c = common local_unnamed_addr global [1 x i32] zeroinitializer, align 4
+
+define i32 @foo() local_unnamed_addr #0 {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @a, align 4
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> <i32 8, i32 1, i32 2, i32 3>, [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP5]], i32 2
+; CHECK-NEXT: store i32 [[TMP6]], i32* getelementptr inbounds ([1 x i32], [1 x i32]* @c, i64 1, i64 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3
+; CHECK-NEXT: store i32 [[TMP7]], i32* getelementptr ([1 x i32], [1 x i32]* @c, i64 2, i64 0), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP5]], i32 0
+; CHECK-NEXT: store i32 [[TMP8]], i32* getelementptr inbounds ([1 x i32], [1 x i32]* @c, i64 0, i64 0), align 4
+; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast (i32* getelementptr ([1 x i32], [1 x i32]* @c, i64 7, i64 0) to <4 x i32>*), align 4
+; CHECK-NEXT: ret i32 undef
+;
+entry:
+ %0 = load i32, i32* @a, align 4
+ %add = add nsw i32 %0, 1
+ %add1 = add nsw i32 %0, 2
+ %add6 = add nsw i32 %0, 3
+ %add11 = add nsw i32 %0, 8
+ store i32 %add1, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @c, i64 1, i64 0), align 4
+ store i32 %add6, i32* getelementptr ([1 x i32], [1 x i32]* @c, i64 2, i64 0), align 4
+ store i32 %add11, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @c, i64 0, i64 0), align 4
+ store i32 %add, i32* getelementptr ([1 x i32], [1 x i32]* @c, i64 8, i64 0), align 4
+ store i32 %add1, i32* getelementptr ([1 x i32], [1 x i32]* @c, i64 9, i64 0), align 4
+ store i32 %add6, i32* getelementptr ([1 x i32], [1 x i32]* @c, i64 10, i64 0), align 4
+ store i32 %add11, i32* getelementptr ([1 x i32], [1 x i32]* @c, i64 7, i64 0), align 4
+ ret i32 undef
+}
More information about the llvm-commits
mailing list