[PATCH] D79799: [VectorCombine] add loop to enable iterative folding

Sanjay Patel via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue May 12 12:55:00 PDT 2020


spatel created this revision.
spatel added reviewers: nikic, lebedev.ri, efriedma, RKSimon.
Herald added subscribers: hiraditya, mcrosier.
Herald added a project: LLVM.

Given the limited range of potential vector transforms, this is an acceptable way to iterate? If this is or becomes too inefficient, we could use a worklist strategy like instcombine, but that would require altering more code. The motivation comes from PR42174:
https://bugs.llvm.org/show_bug.cgi?id=42174
...although we don't have the underlying scalarization-with-constant-operand fold yet. If we add that transform 1st, it would only scalarize 1 instruction instead of the entire chain of insert-insert-binops.


https://reviews.llvm.org/D79799

Files:
  llvm/lib/Transforms/Vectorize/VectorCombine.cpp
  llvm/test/Transforms/VectorCombine/X86/insert-binop.ll


Index: llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
===================================================================
--- llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
+++ llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
@@ -51,11 +51,9 @@
 define <2 x i64> @ins1_ins1_iterate(i64 %w, i64 %x, i64 %y, i64 %z) {
 ; CHECK-LABEL: @ins1_ins1_iterate(
 ; CHECK-NEXT:    [[S0_SCALAR:%.*]] = sub i64 [[W:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[S0:%.*]] = insertelement <2 x i64> undef, i64 [[S0_SCALAR]], i64 1
-; CHECK-NEXT:    [[I2:%.*]] = insertelement <2 x i64> undef, i64 [[Y:%.*]], i32 1
-; CHECK-NEXT:    [[S1:%.*]] = or <2 x i64> [[S0]], [[I2]]
-; CHECK-NEXT:    [[I3:%.*]] = insertelement <2 x i64> undef, i64 [[Z:%.*]], i32 1
-; CHECK-NEXT:    [[S2:%.*]] = shl <2 x i64> [[I3]], [[S1]]
+; CHECK-NEXT:    [[S1_SCALAR:%.*]] = or i64 [[S0_SCALAR]], [[Y:%.*]]
+; CHECK-NEXT:    [[S2_SCALAR:%.*]] = shl i64 [[Z:%.*]], [[S1_SCALAR]]
+; CHECK-NEXT:    [[S2:%.*]] = insertelement <2 x i64> undef, i64 [[S2_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[S2]]
 ;
   %i0 = insertelement <2 x i64> undef, i64 %w, i64 1
Index: llvm/lib/Transforms/Vectorize/VectorCombine.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -376,28 +376,35 @@
     return false;
 
   bool MadeChange = false;
-  for (BasicBlock &BB : F) {
-    // Ignore unreachable basic blocks.
-    if (!DT.isReachableFromEntry(&BB))
-      continue;
-    // Do not delete instructions under here and invalidate the iterator.
-    // Walk the block backwards for efficiency. We're matching a chain of
-    // use->defs, so we're more likely to succeed by starting from the bottom.
-    // TODO: It could be more efficient to remove dead instructions
-    //       iteratively in this loop rather than waiting until the end.
-    for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
-      if (isa<DbgInfoIntrinsic>(I))
+
+  // Iterate until there are no more changes. Transforms can build on each
+  // other's improvements.
+  bool IterationChange;
+  do {
+    IterationChange = false;
+    for (BasicBlock &BB : F) {
+      // Ignore unreachable basic blocks.
+      if (!DT.isReachableFromEntry(&BB))
         continue;
-      MadeChange |= foldExtractExtract(I, TTI);
-      MadeChange |= foldBitcastShuf(I, TTI);
-      MadeChange |= scalarizeBinop(I, TTI);
-    }
-  }
 
-  // We're done with transforms, so remove dead instructions.
-  if (MadeChange)
-    for (BasicBlock &BB : F)
-      SimplifyInstructionsInBlock(&BB);
+      // Walk the block backwards for efficiency. We are matching a chain of
+      // use->defs, so we're more likely to succeed by starting from the bottom.
+      for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
+        if (isa<DbgInfoIntrinsic>(I))
+          continue;
+        IterationChange |= foldExtractExtract(I, TTI);
+        IterationChange |= foldBitcastShuf(I, TTI);
+        IterationChange |= scalarizeBinop(I, TTI);
+      }
+    }
+    // Remove dead instructions before iterating.
+    if (IterationChange)
+      for (BasicBlock &BB : F)
+        SimplifyInstructionsInBlock(&BB);
+
+    // Set overall changed flag.
+    MadeChange |= IterationChange;
+  } while (IterationChange);
 
   return MadeChange;
 }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D79799.263468.patch
Type: text/x-patch
Size: 3395 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200512/2a0357f3/attachment.bin>


More information about the llvm-commits mailing list