[llvm] aaf1630 - [Scalarizer] No need to gather a scattered extracted element

via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 21 09:46:39 PDT 2022


Author: serge-sans-paille
Date: 2022-06-21T18:43:54+02:00
New Revision: aaf1630ac38c890f1476fe9ee002f5fe2f5cc188

URL: https://github.com/llvm/llvm-project/commit/aaf1630ac38c890f1476fe9ee002f5fe2f5cc188
DIFF: https://github.com/llvm/llvm-project/commit/aaf1630ac38c890f1476fe9ee002f5fe2f5cc188.diff

LOG: [Scalarizer] No need to gather a scattered extracted element

ExtractElement does not produce a vector out of a vector, so there's no need to
call a gather once done.

Fix #54469

Credits to npopov at redhat.com for the original approach.

Differential Revision: https://reviews.llvm.org/D126012

Added: 
    llvm/test/Transforms/Scalarizer/vector-of-pointer-to-vector.ll

Modified: 
    llvm/lib/Transforms/Scalar/Scalarizer.cpp
    llvm/test/Transforms/Scalarizer/global-bug.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index a7672a91de3a7..8a5c988a514ed 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -229,6 +229,7 @@ class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
 private:
   Scatterer scatter(Instruction *Point, Value *V, Type *PtrElemTy = nullptr);
   void gather(Instruction *Op, const ValueVector &CV);
+  void replaceUses(Instruction *Op, Value *CV);
   bool canTransferMetadata(unsigned Kind);
   void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
   Optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment,
@@ -242,6 +243,7 @@ class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
 
   ScatterMap Scattered;
   GatherList Gathered;
+  bool Scalarized;
 
   SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs;
 
@@ -361,6 +363,8 @@ FunctionPass *llvm::createScalarizerPass() {
 bool ScalarizerVisitor::visit(Function &F) {
   assert(Gathered.empty() && Scattered.empty());
 
+  Scalarized = false;
+
   // To ensure we replace gathered components correctly we need to do an ordered
   // traversal of the basic blocks in the function.
   ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock());
@@ -436,6 +440,15 @@ void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
   Gathered.push_back(GatherList::value_type(Op, &SV));
 }
 
+// Replace Op with CV and collect Op has a potentially dead instruction.
+void ScalarizerVisitor::replaceUses(Instruction *Op, Value *CV) {
+  if (CV != Op) {
+    Op->replaceAllUsesWith(CV);
+    PotentiallyDeadInstrs.emplace_back(Op);
+    Scalarized = true;
+  }
+}
+
 // Return true if it is safe to transfer the given metadata tag from
 // vector to scalar instructions.
 bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
@@ -828,7 +841,7 @@ bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
 
   if (auto *CI = dyn_cast<ConstantInt>(ExtIdx)) {
     Value *Res = Op0[CI->getValue().getZExtValue()];
-    gather(&EEI, {Res});
+    replaceUses(&EEI, Res);
     return true;
   }
 
@@ -844,7 +857,7 @@ bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
     Res = Builder.CreateSelect(ShouldExtract, Elt, Res,
                                EEI.getName() + ".upto" + Twine(I));
   }
-  gather(&EEI, {Res});
+  replaceUses(&EEI, Res);
   return true;
 }
 
@@ -959,7 +972,7 @@ bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
 bool ScalarizerVisitor::finish() {
   // The presence of data in Gathered or Scattered indicates changes
   // made to the Function.
-  if (Gathered.empty() && Scattered.empty())
+  if (Gathered.empty() && Scattered.empty() && !Scalarized)
     return false;
   for (const auto &GMI : Gathered) {
     Instruction *Op = GMI.first;
@@ -990,6 +1003,7 @@ bool ScalarizerVisitor::finish() {
   }
   Gathered.clear();
   Scattered.clear();
+  Scalarized = false;
 
   RecursivelyDeleteTriviallyDeadInstructionsPermissive(PotentiallyDeadInstrs);
 

diff  --git a/llvm/test/Transforms/Scalarizer/global-bug.ll b/llvm/test/Transforms/Scalarizer/global-bug.ll
index 91d1bd23af988..d3c3b1eeaba23 100644
--- a/llvm/test/Transforms/Scalarizer/global-bug.ll
+++ b/llvm/test/Transforms/Scalarizer/global-bug.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes='function(scalarizer)' -S | FileCheck %s
 
 @a = dso_local global i16 0, align 1

diff  --git a/llvm/test/Transforms/Scalarizer/vector-of-pointer-to-vector.ll b/llvm/test/Transforms/Scalarizer/vector-of-pointer-to-vector.ll
new file mode 100644
index 0000000000000..53de328a060d0
--- /dev/null
+++ b/llvm/test/Transforms/Scalarizer/vector-of-pointer-to-vector.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define <1 x i32> @f1(<1 x <1 x i32>*> %src, i32 %index) {
+; CHECK-LABEL: @f1(
+; CHECK-NEXT:    [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0
+; CHECK-NEXT:    [[SRC_I0:%.*]] = extractelement <1 x <1 x i32>*> [[SRC:%.*]], i32 0
+; CHECK-NEXT:    [[DOTUPTO0:%.*]] = select i1 [[INDEX_IS_0]], <1 x i32>* [[SRC_I0]], <1 x i32>* undef
+; CHECK-NEXT:    [[DOTUPTO0_I0:%.*]] = bitcast <1 x i32>* [[DOTUPTO0]] to i32*
+; CHECK-NEXT:    [[DOTI0:%.*]] = load i32, i32* [[DOTUPTO0_I0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <1 x i32> poison, i32 [[DOTI0]], i32 0
+; CHECK-NEXT:    ret <1 x i32> [[TMP1]]
+;
+  %1 = extractelement <1 x <1 x i32>*> %src, i32 %index
+  %2 = load <1 x i32>, <1 x i32>* %1, align 4
+  ret <1 x i32> %2
+}
+
+define <1 x i32> @f1b(<1 x <1 x i32>*> %src) {
+; CHECK-LABEL: @f1b(
+; CHECK-NEXT:    [[SRC_I0:%.*]] = extractelement <1 x <1 x i32>*> [[SRC:%.*]], i32 0
+; CHECK-NEXT:    [[SRC_I0_I0:%.*]] = bitcast <1 x i32>* [[SRC_I0]] to i32*
+; CHECK-NEXT:    [[DOTI0:%.*]] = load i32, i32* [[SRC_I0_I0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <1 x i32> poison, i32 [[DOTI0]], i32 0
+; CHECK-NEXT:    ret <1 x i32> [[TMP1]]
+;
+  %1 = extractelement <1 x <1 x i32>*> %src, i32 0
+  %2 = load <1 x i32>, <1 x i32>* %1, align 4
+  ret <1 x i32> %2
+}
+
+define <2 x i32> @f2(<1 x <2 x i32>*> %src, i32 %index) {
+; CHECK-LABEL: @f2(
+; CHECK-NEXT:    [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0
+; CHECK-NEXT:    [[SRC_I0:%.*]] = extractelement <1 x <2 x i32>*> [[SRC:%.*]], i32 0
+; CHECK-NEXT:    [[DOTUPTO0:%.*]] = select i1 [[INDEX_IS_0]], <2 x i32>* [[SRC_I0]], <2 x i32>* undef
+; CHECK-NEXT:    [[DOTUPTO0_I0:%.*]] = bitcast <2 x i32>* [[DOTUPTO0]] to i32*
+; CHECK-NEXT:    [[DOTUPTO0_I1:%.*]] = getelementptr i32, i32* [[DOTUPTO0_I0]], i32 1
+; CHECK-NEXT:    [[DOTI0:%.*]] = load i32, i32* [[DOTUPTO0_I0]], align 4
+; CHECK-NEXT:    [[DOTI1:%.*]] = load i32, i32* [[DOTUPTO0_I1]], align 4
+; CHECK-NEXT:    [[DOTUPTO01:%.*]] = insertelement <2 x i32> poison, i32 [[DOTI0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[DOTUPTO01]], i32 [[DOTI1]], i32 1
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %1 = extractelement <1 x <2 x i32>*> %src, i32 %index
+  %2 = load <2 x i32>, <2 x i32>* %1, align 4
+  ret <2 x i32> %2
+}
+
+define <2 x i32> @f2b(<1 x <2 x i32>*> %src) {
+; CHECK-LABEL: @f2b(
+; CHECK-NEXT:    [[SRC_I0:%.*]] = extractelement <1 x <2 x i32>*> [[SRC:%.*]], i32 0
+; CHECK-NEXT:    [[SRC_I0_I0:%.*]] = bitcast <2 x i32>* [[SRC_I0]] to i32*
+; CHECK-NEXT:    [[SRC_I0_I1:%.*]] = getelementptr i32, i32* [[SRC_I0_I0]], i32 1
+; CHECK-NEXT:    [[DOTI0:%.*]] = load i32, i32* [[SRC_I0_I0]], align 4
+; CHECK-NEXT:    [[DOTI1:%.*]] = load i32, i32* [[SRC_I0_I1]], align 4
+; CHECK-NEXT:    [[DOTUPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[DOTI0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[DOTUPTO0]], i32 [[DOTI1]], i32 1
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %1 = extractelement <1 x <2 x i32>*> %src, i32 0
+  %2 = load <2 x i32>, <2 x i32>* %1, align 4
+  ret <2 x i32> %2
+}
+
+define void @f3(<1 x <2 x i32>*> %src, i32 %index, <2 x i32> %val) {
+; CHECK-LABEL: @f3(
+; CHECK-NEXT:    [[VAL_I0:%.*]] = extractelement <2 x i32> [[VAL:%.*]], i32 0
+; CHECK-NEXT:    [[VAL_I1:%.*]] = extractelement <2 x i32> [[VAL]], i32 1
+; CHECK-NEXT:    [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0
+; CHECK-NEXT:    [[SRC_I0:%.*]] = extractelement <1 x <2 x i32>*> [[SRC:%.*]], i32 0
+; CHECK-NEXT:    [[DOTUPTO0:%.*]] = select i1 [[INDEX_IS_0]], <2 x i32>* [[SRC_I0]], <2 x i32>* undef
+; CHECK-NEXT:    [[DOTUPTO0_I0:%.*]] = bitcast <2 x i32>* [[DOTUPTO0]] to i32*
+; CHECK-NEXT:    [[DOTUPTO0_I1:%.*]] = getelementptr i32, i32* [[DOTUPTO0_I0]], i32 1
+; CHECK-NEXT:    store i32 [[VAL_I0]], i32* [[DOTUPTO0_I0]], align 4
+; CHECK-NEXT:    store i32 [[VAL_I1]], i32* [[DOTUPTO0_I1]], align 4
+; CHECK-NEXT:    ret void
+;
+  %1 = extractelement <1 x <2 x i32>*> %src, i32 %index
+  store <2 x i32> %val, <2 x i32>* %1, align 4
+  ret void
+}
+
+define void @f3b(<1 x <2 x i32>*> %src, <2 x i32> %val) {
+; CHECK-LABEL: @f3b(
+; CHECK-NEXT:    [[VAL_I0:%.*]] = extractelement <2 x i32> [[VAL:%.*]], i32 0
+; CHECK-NEXT:    [[VAL_I1:%.*]] = extractelement <2 x i32> [[VAL]], i32 1
+; CHECK-NEXT:    [[SRC_I0:%.*]] = extractelement <1 x <2 x i32>*> [[SRC:%.*]], i32 0
+; CHECK-NEXT:    [[SRC_I0_I0:%.*]] = bitcast <2 x i32>* [[SRC_I0]] to i32*
+; CHECK-NEXT:    [[SRC_I0_I1:%.*]] = getelementptr i32, i32* [[SRC_I0_I0]], i32 1
+; CHECK-NEXT:    store i32 [[VAL_I0]], i32* [[SRC_I0_I0]], align 4
+; CHECK-NEXT:    store i32 [[VAL_I1]], i32* [[SRC_I0_I1]], align 4
+; CHECK-NEXT:    ret void
+;
+  %1 = extractelement <1 x <2 x i32>*> %src, i32 0
+  store <2 x i32> %val, <2 x i32>* %1, align 4
+  ret void
+}


        


More information about the llvm-commits mailing list