[llvm] 51f9310 - [Scalarizer] ExtractElement handling w/ variable insert index (PR46524)
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 6 03:20:13 PDT 2020
Author: Roman Lebedev
Date: 2020-07-06T13:19:33+03:00
New Revision: 51f9310ff2e3a615e43b87acc84dab0400b5854e
URL: https://github.com/llvm/llvm-project/commit/51f9310ff2e3a615e43b87acc84dab0400b5854e
DIFF: https://github.com/llvm/llvm-project/commit/51f9310ff2e3a615e43b87acc84dab0400b5854e.diff
LOG: [Scalarizer] ExtractElement handling w/ variable insert index (PR46524)
Summary:
Similar to D82961.
Reviewers: bjope, cameron.mcinally, arsenm, jdoerfert
Reviewed By: jdoerfert
Subscribers: arphaman, wdng, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D82970
Added:
Modified:
llvm/lib/Transforms/Scalar/Scalarizer.cpp
llvm/test/Transforms/Scalarizer/variable-extractelement.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 0327d3932135..a775be6ef7b8 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -785,6 +785,7 @@ bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
if (!VT)
return false;
+ unsigned NumSrcElems = VT->getNumElements();
IRBuilder<> Builder(&EEI);
Scatterer Op0 = scatter(&EEI, EEI.getOperand(0));
Value *ExtIdx = EEI.getOperand(1);
@@ -795,7 +796,18 @@ bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
return true;
}
- return false;
+ if (!ScalarizeVariableInsertExtract)
+ return false;
+
+ Value *Res = UndefValue::get(VT->getElementType());
+ for (unsigned I = 0; I < NumSrcElems; ++I) {
+ Res = Builder.CreateSelect(
+ Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->getType(), I),
+ ExtIdx->getName() + ".is." + Twine(I)),
+ Op0[I], Res, EEI.getName() + ".upto" + Twine(I));
+ }
+ gather(&EEI, {Res});
+ return true;
}
bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
diff --git a/llvm/test/Transforms/Scalarizer/variable-extractelement.ll b/llvm/test/Transforms/Scalarizer/variable-extractelement.ll
index 2f1c24878de0..50666562af32 100644
--- a/llvm/test/Transforms/Scalarizer/variable-extractelement.ll
+++ b/llvm/test/Transforms/Scalarizer/variable-extractelement.ll
@@ -1,38 +1,72 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt %s -scalarizer -scalarize-load-store -dce -S | FileCheck --check-prefixes=ALL %s
+; RUN: opt %s -scalarizer -dce -S | FileCheck --check-prefixes=ALL,DEFAULT %s
+; RUN: opt %s -scalarizer -scalarize-variable-insert-extract=false -dce -S | FileCheck --check-prefixes=ALL,OFF %s
+; RUN: opt %s -scalarizer -scalarize-variable-insert-extract=true -dce -S | FileCheck --check-prefixes=ALL,DEFAULT,ON %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; Test that variable extracts scalarized.
define i32 @f1(<4 x i32> %src, i32 %index) {
-; ALL-LABEL: @f1(
-; ALL-NEXT: [[RES:%.*]] = extractelement <4 x i32> [[SRC:%.*]], i32 [[INDEX:%.*]]
-; ALL-NEXT: ret i32 [[RES]]
+; DEFAULT-LABEL: @f1(
+; DEFAULT-NEXT: [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0
+; DEFAULT-NEXT: [[SRC_I0:%.*]] = extractelement <4 x i32> [[SRC:%.*]], i32 0
+; DEFAULT-NEXT: [[RES_UPTO0:%.*]] = select i1 [[INDEX_IS_0]], i32 [[SRC_I0]], i32 undef
+; DEFAULT-NEXT: [[INDEX_IS_1:%.*]] = icmp eq i32 [[INDEX]], 1
+; DEFAULT-NEXT: [[SRC_I1:%.*]] = extractelement <4 x i32> [[SRC]], i32 1
+; DEFAULT-NEXT: [[RES_UPTO1:%.*]] = select i1 [[INDEX_IS_1]], i32 [[SRC_I1]], i32 [[RES_UPTO0]]
+; DEFAULT-NEXT: [[INDEX_IS_2:%.*]] = icmp eq i32 [[INDEX]], 2
+; DEFAULT-NEXT: [[SRC_I2:%.*]] = extractelement <4 x i32> [[SRC]], i32 2
+; DEFAULT-NEXT: [[RES_UPTO2:%.*]] = select i1 [[INDEX_IS_2]], i32 [[SRC_I2]], i32 [[RES_UPTO1]]
+; DEFAULT-NEXT: [[INDEX_IS_3:%.*]] = icmp eq i32 [[INDEX]], 3
+; DEFAULT-NEXT: [[SRC_I3:%.*]] = extractelement <4 x i32> [[SRC]], i32 3
+; DEFAULT-NEXT: [[RES:%.*]] = select i1 [[INDEX_IS_3]], i32 [[SRC_I3]], i32 [[RES_UPTO2]]
+; DEFAULT-NEXT: ret i32 [[RES]]
+;
+; OFF-LABEL: @f1(
+; OFF-NEXT: [[RES:%.*]] = extractelement <4 x i32> [[SRC:%.*]], i32 [[INDEX:%.*]]
+; OFF-NEXT: ret i32 [[RES]]
;
%res = extractelement <4 x i32> %src, i32 %index
ret i32 %res
}
define i32 @f2(<4 x i32> *%src, i32 %index) {
-; ALL-LABEL: @f2(
-; ALL-NEXT: [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32*
-; ALL-NEXT: [[VAL0_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16
-; ALL-NEXT: [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1
-; ALL-NEXT: [[VAL0_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4
-; ALL-NEXT: [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
-; ALL-NEXT: [[VAL0_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8
-; ALL-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
-; ALL-NEXT: [[VAL0_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4
-; ALL-NEXT: [[VAL1_I0:%.*]] = shl i32 1, [[VAL0_I0]]
-; ALL-NEXT: [[VAL1_I1:%.*]] = shl i32 2, [[VAL0_I1]]
-; ALL-NEXT: [[VAL1_I2:%.*]] = shl i32 3, [[VAL0_I2]]
-; ALL-NEXT: [[VAL1_I3:%.*]] = shl i32 4, [[VAL0_I3]]
-; ALL-NEXT: [[VAL1_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL1_I0]], i32 0
-; ALL-NEXT: [[VAL1_UPTO1:%.*]] = insertelement <4 x i32> [[VAL1_UPTO0]], i32 [[VAL1_I1]], i32 1
-; ALL-NEXT: [[VAL1_UPTO2:%.*]] = insertelement <4 x i32> [[VAL1_UPTO1]], i32 [[VAL1_I2]], i32 2
-; ALL-NEXT: [[VAL1:%.*]] = insertelement <4 x i32> [[VAL1_UPTO2]], i32 [[VAL1_I3]], i32 3
-; ALL-NEXT: [[VAL2:%.*]] = extractelement <4 x i32> [[VAL1]], i32 [[INDEX:%.*]]
-; ALL-NEXT: ret i32 [[VAL2]]
+; DEFAULT-LABEL: @f2(
+; DEFAULT-NEXT: [[VAL0:%.*]] = load <4 x i32>, <4 x i32>* [[SRC:%.*]], align 16
+; DEFAULT-NEXT: [[VAL0_I0:%.*]] = extractelement <4 x i32> [[VAL0]], i32 0
+; DEFAULT-NEXT: [[VAL1_I0:%.*]] = shl i32 1, [[VAL0_I0]]
+; DEFAULT-NEXT: [[VAL0_I1:%.*]] = extractelement <4 x i32> [[VAL0]], i32 1
+; DEFAULT-NEXT: [[VAL1_I1:%.*]] = shl i32 2, [[VAL0_I1]]
+; DEFAULT-NEXT: [[VAL0_I2:%.*]] = extractelement <4 x i32> [[VAL0]], i32 2
+; DEFAULT-NEXT: [[VAL1_I2:%.*]] = shl i32 3, [[VAL0_I2]]
+; DEFAULT-NEXT: [[VAL0_I3:%.*]] = extractelement <4 x i32> [[VAL0]], i32 3
+; DEFAULT-NEXT: [[VAL1_I3:%.*]] = shl i32 4, [[VAL0_I3]]
+; DEFAULT-NEXT: [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0
+; DEFAULT-NEXT: [[VAL2_UPTO0:%.*]] = select i1 [[INDEX_IS_0]], i32 [[VAL1_I0]], i32 undef
+; DEFAULT-NEXT: [[INDEX_IS_1:%.*]] = icmp eq i32 [[INDEX]], 1
+; DEFAULT-NEXT: [[VAL2_UPTO1:%.*]] = select i1 [[INDEX_IS_1]], i32 [[VAL1_I1]], i32 [[VAL2_UPTO0]]
+; DEFAULT-NEXT: [[INDEX_IS_2:%.*]] = icmp eq i32 [[INDEX]], 2
+; DEFAULT-NEXT: [[VAL2_UPTO2:%.*]] = select i1 [[INDEX_IS_2]], i32 [[VAL1_I2]], i32 [[VAL2_UPTO1]]
+; DEFAULT-NEXT: [[INDEX_IS_3:%.*]] = icmp eq i32 [[INDEX]], 3
+; DEFAULT-NEXT: [[VAL2:%.*]] = select i1 [[INDEX_IS_3]], i32 [[VAL1_I3]], i32 [[VAL2_UPTO2]]
+; DEFAULT-NEXT: ret i32 [[VAL2]]
+;
+; OFF-LABEL: @f2(
+; OFF-NEXT: [[VAL0:%.*]] = load <4 x i32>, <4 x i32>* [[SRC:%.*]], align 16
+; OFF-NEXT: [[VAL0_I0:%.*]] = extractelement <4 x i32> [[VAL0]], i32 0
+; OFF-NEXT: [[VAL1_I0:%.*]] = shl i32 1, [[VAL0_I0]]
+; OFF-NEXT: [[VAL0_I1:%.*]] = extractelement <4 x i32> [[VAL0]], i32 1
+; OFF-NEXT: [[VAL1_I1:%.*]] = shl i32 2, [[VAL0_I1]]
+; OFF-NEXT: [[VAL0_I2:%.*]] = extractelement <4 x i32> [[VAL0]], i32 2
+; OFF-NEXT: [[VAL1_I2:%.*]] = shl i32 3, [[VAL0_I2]]
+; OFF-NEXT: [[VAL0_I3:%.*]] = extractelement <4 x i32> [[VAL0]], i32 3
+; OFF-NEXT: [[VAL1_I3:%.*]] = shl i32 4, [[VAL0_I3]]
+; OFF-NEXT: [[VAL1_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL1_I0]], i32 0
+; OFF-NEXT: [[VAL1_UPTO1:%.*]] = insertelement <4 x i32> [[VAL1_UPTO0]], i32 [[VAL1_I1]], i32 1
+; OFF-NEXT: [[VAL1_UPTO2:%.*]] = insertelement <4 x i32> [[VAL1_UPTO1]], i32 [[VAL1_I2]], i32 2
+; OFF-NEXT: [[VAL1:%.*]] = insertelement <4 x i32> [[VAL1_UPTO2]], i32 [[VAL1_I3]], i32 3
+; OFF-NEXT: [[VAL2:%.*]] = extractelement <4 x i32> [[VAL1]], i32 [[INDEX:%.*]]
+; OFF-NEXT: ret i32 [[VAL2]]
;
%val0 = load <4 x i32> , <4 x i32> *%src
%val1 = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %val0
More information about the llvm-commits
mailing list