[llvm] 28b7816 - [Scalarizer] ExtractElement handling w/ constant extract index
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 6 03:20:09 PDT 2020
Author: Roman Lebedev
Date: 2020-07-06T13:19:32+03:00
New Revision: 28b7816b782bdeca509218b53edfbca6512c33d5
URL: https://github.com/llvm/llvm-project/commit/28b7816b782bdeca509218b53edfbca6512c33d5
DIFF: https://github.com/llvm/llvm-project/commit/28b7816b782bdeca509218b53edfbca6512c33d5.diff
LOG: [Scalarizer] ExtractElement handling w/ constant extract index
Summary:
It appears to be better IR-wise to aggressively scalarize it,
rather than relying on gathering it, and leaving it as-is.
Reviewers: jdoerfert, bjope, arsenm, cameron.mcinally
Reviewed By: jdoerfert
Subscribers: arphaman, wdng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83101
Added:
Modified:
llvm/lib/Transforms/Scalar/Scalarizer.cpp
llvm/test/Transforms/Scalarizer/constant-extractelement.ll
llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 6802a9101882..5cc4d795d767 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -193,6 +193,7 @@ class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
bool visitCastInst(CastInst &CI);
bool visitBitCastInst(BitCastInst &BCI);
bool visitInsertElementInst(InsertElementInst &IEI);
+ bool visitExtractElementInst(ExtractElementInst &EEI);
bool visitShuffleVectorInst(ShuffleVectorInst &SVI);
bool visitPHINode(PHINode &PHI);
bool visitLoadInst(LoadInst &LI);
@@ -766,6 +767,24 @@ bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
return true;
}
+bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
+ VectorType *VT = dyn_cast<VectorType>(EEI.getOperand(0)->getType());
+ if (!VT)
+ return false;
+
+ IRBuilder<> Builder(&EEI);
+ Scatterer Op0 = scatter(&EEI, EEI.getOperand(0));
+ Value *ExtIdx = EEI.getOperand(1);
+
+ if (auto *CI = dyn_cast<ConstantInt>(ExtIdx)) {
+ Value *Res = Op0[CI->getValue().getZExtValue()];
+ gather(&EEI, {Res});
+ return true;
+ }
+
+ return false;
+}
+
bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
VectorType *VT = dyn_cast<VectorType>(SVI.getType());
if (!VT)
@@ -885,16 +904,20 @@ bool ScalarizerVisitor::finish() {
if (!Op->use_empty()) {
// The value is still needed, so recreate it using a series of
// InsertElements.
- auto *Ty = cast<VectorType>(Op->getType());
- Value *Res = UndefValue::get(Ty);
- BasicBlock *BB = Op->getParent();
- unsigned Count = Ty->getNumElements();
- IRBuilder<> Builder(Op);
- if (isa<PHINode>(Op))
- Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
- for (unsigned I = 0; I < Count; ++I)
- Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
- Op->getName() + ".upto" + Twine(I));
+ Value *Res = UndefValue::get(Op->getType());
+ if (auto *Ty = dyn_cast<VectorType>(Op->getType())) {
+ BasicBlock *BB = Op->getParent();
+ unsigned Count = Ty->getNumElements();
+ IRBuilder<> Builder(Op);
+ if (isa<PHINode>(Op))
+ Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
+ for (unsigned I = 0; I < Count; ++I)
+ Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
+ Op->getName() + ".upto" + Twine(I));
+ } else {
+ assert(CV.size() == 1 && Op->getType() == CV[0]->getType());
+ Res = CV[0];
+ }
Res->takeName(Op);
Op->replaceAllUsesWith(Res);
}
diff --git a/llvm/test/Transforms/Scalarizer/constant-extractelement.ll b/llvm/test/Transforms/Scalarizer/constant-extractelement.ll
index e5d935d186b7..f5bb2edac4e6 100644
--- a/llvm/test/Transforms/Scalarizer/constant-extractelement.ll
+++ b/llvm/test/Transforms/Scalarizer/constant-extractelement.ll
@@ -7,22 +7,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
define i32 @f1(<4 x i32> *%src, i32 %index) {
; ALL-LABEL: @f1(
; ALL-NEXT: [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32*
-; ALL-NEXT: [[VAL0_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16
-; ALL-NEXT: [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1
-; ALL-NEXT: [[VAL0_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4
-; ALL-NEXT: [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
-; ALL-NEXT: [[VAL0_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8
; ALL-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
; ALL-NEXT: [[VAL0_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4
-; ALL-NEXT: [[VAL1_I0:%.*]] = shl i32 1, [[VAL0_I0]]
-; ALL-NEXT: [[VAL1_I1:%.*]] = shl i32 2, [[VAL0_I1]]
-; ALL-NEXT: [[VAL1_I2:%.*]] = shl i32 3, [[VAL0_I2]]
-; ALL-NEXT: [[VAL1_I3:%.*]] = shl i32 4, [[VAL0_I3]]
-; ALL-NEXT: [[VAL1_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL1_I0]], i32 0
-; ALL-NEXT: [[VAL1_UPTO1:%.*]] = insertelement <4 x i32> [[VAL1_UPTO0]], i32 [[VAL1_I1]], i32 1
-; ALL-NEXT: [[VAL1_UPTO2:%.*]] = insertelement <4 x i32> [[VAL1_UPTO1]], i32 [[VAL1_I2]], i32 2
-; ALL-NEXT: [[VAL1:%.*]] = insertelement <4 x i32> [[VAL1_UPTO2]], i32 [[VAL1_I3]], i32 3
-; ALL-NEXT: [[VAL2:%.*]] = extractelement <4 x i32> [[VAL1]], i32 3
+; ALL-NEXT: [[VAL2:%.*]] = shl i32 4, [[VAL0_I3]]
; ALL-NEXT: ret i32 [[VAL2]]
;
%val0 = load <4 x i32> , <4 x i32> *%src
diff --git a/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll b/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll
index 1de1f6509666..8e89efb5d31f 100644
--- a/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll
+++ b/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll
@@ -15,12 +15,7 @@ define i16 @f1() {
; CHECK-NEXT: [[PHI_I1:%.*]] = phi i16 [ 1, [[ENTRY]] ], [ undef, [[FOR_COND]] ]
; CHECK-NEXT: [[PHI_I2:%.*]] = phi i16 [ 1, [[ENTRY]] ], [ undef, [[FOR_COND]] ]
; CHECK-NEXT: [[PHI_I3:%.*]] = phi i16 [ 1, [[ENTRY]] ], [ undef, [[FOR_COND]] ]
-; CHECK-NEXT: [[PHI_UPTO0:%.*]] = insertelement <4 x i16> undef, i16 [[PHI_I0]], i32 0
-; CHECK-NEXT: [[PHI_UPTO1:%.*]] = insertelement <4 x i16> [[PHI_UPTO0]], i16 [[PHI_I1]], i32 1
-; CHECK-NEXT: [[PHI_UPTO2:%.*]] = insertelement <4 x i16> [[PHI_UPTO1]], i16 [[PHI_I2]], i32 2
-; CHECK-NEXT: [[PHI:%.*]] = insertelement <4 x i16> [[PHI_UPTO2]], i16 [[PHI_I3]], i32 3
-; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x i16> [[PHI]], i32 0
-; CHECK-NEXT: ret i16 [[EXTRACT]]
+; CHECK-NEXT: ret i16 [[PHI_I0]]
;
entry:
br label %for.end
More information about the llvm-commits
mailing list