[llvm] 34ff90a - [Reassociate] Don't convert add-like-or's into add's if they appear to be part of load-combining idiom

Wed Nov 18 06:55:37 PST 2020

Author: Roman Lebedev
Date: 2020-11-18T17:55:02+03:00
New Revision: 34ff90ad5d7d17a464bff9abbad70229c693a551

URL: https://github.com/llvm/llvm-project/commit/34ff90ad5d7d17a464bff9abbad70229c693a551
DIFF: https://github.com/llvm/llvm-project/commit/34ff90ad5d7d17a464bff9abbad70229c693a551.diff

LOG: [Reassociate] Don't convert add-like-or's into add's if they appear to be part of load-combining idiom

As Wei Mi is reporting in post-commit review
  https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20201116/853479.html
teaching -reassociate about add-like-or's (70472f3) results in breaking apart
load widening patterns, and reassociating them.

For now, simply exclude any such `or` that appears to be a root of
load widening idiom from the or->add transformation.

Note that the heuristic is greedy, it doesn't ensure that loads
can *actually* be widened into a single load.

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/Reassociate.cpp
    llvm/test/Transforms/Reassociate/load-combine-like-or.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 73fdd6ea1fd4..252677906854 100644

--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -920,8 +920,66 @@ static Value *NegateValue(Value *V, Instruction *BI,
   return NewNeg;
 }
 
+// See if this `or` looks like an load widening reduction, i.e. that it
+// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
+// ensure that the pattern is *really* a load widening reduction,
+// we do not ensure that it can really be replaced with a widened load,
+// only that it mostly looks like one.
+static bool isLoadCombineCandidate(Instruction *Or) {
+  SmallVector<Instruction *, 8> Worklist;
+  SmallSet<Instruction *, 8> Visited;
+
+  auto Enqueue = [&](Value *V) {
+    auto *I = dyn_cast<Instruction>(V);
+    // Each node of an `or` reduction must be an instruction,
+    if (!I)
+      return false; // Node is certainly not part of an `or` load reduction.
+    // Only process instructions we have never processed before.
+    if (Visited.insert(I).second)
+      Worklist.emplace_back(I);
+    return true; // Will need to look at parent nodes.
+  };
+
+  if (!Enqueue(Or))
+    return false; // Not an `or` reduction pattern.
+
+  while (!Worklist.empty()) {
+    auto *I = Worklist.pop_back_val();
+
+    // Okay, which instruction is this node?
+    switch (I->getOpcode()) {
+    case Instruction::Or:
+      // Got an `or` node. That's fine, just recurse into it's operands.
+      for (Value *Op : I->operands())
+        if (!Enqueue(Op))
+          return false; // Not an `or` reduction pattern.
+      continue;
+
+    case Instruction::Shl:
+    case Instruction::ZExt:
+      // `shl`/`zext` nodes are fine, just recurse into their base operand.
+      if (!Enqueue(I->getOperand(0)))
+        return false; // Not an `or` reduction pattern.
+      continue;
+
+    case Instruction::Load:
+      // Perfect, `load` node means we've reached an edge of the graph.
+      continue;
+
+    default:        // Unknown node.
+      return false; // Not an `or` reduction pattern.
+    }
+  }
+
+  return true;
+}
+
 /// Return true if it may be profitable to convert this (X|Y) into (X+Y).
 static bool ShouldConvertOrWithNoCommonBitsToAdd(Instruction *Or) {
+  // If this `or` appears to be a part of an load widening reduction, ignore it.
+  if (isLoadCombineCandidate(Or))
+    return false;
+
   // Don't bother to convert this up unless either the LHS is an associable add
   // or subtract or mul or if this is only used by one of the above.
   // This is only a compile-time improvement, it is not needed for correctness!

diff  --git a/llvm/test/Transforms/Reassociate/load-combine-like-or.ll b/llvm/test/Transforms/Reassociate/load-combine-like-or.ll
index 7058acbcb760..06ac9347f3d8 100644
--- a/llvm/test/Transforms/Reassociate/load-combine-like-or.ll
+++ b/llvm/test/Transforms/Reassociate/load-combine-like-or.ll
@@ -10,8 +10,8 @@ define i16 @p0_i8_i8_i16(i8* %ptr) {
 ; CHECK-NEXT:    [[I4:%.*]] = shl i16 [[I3]], 8
 ; CHECK-NEXT:    [[I5:%.*]] = load i8, i8* [[PTR]], align 1
 ; CHECK-NEXT:    [[I6:%.*]] = zext i8 [[I5]] to i16
-; CHECK-NEXT:    [[I7:%.*]] = add i16 [[I6]], 42
-; CHECK-NEXT:    [[I8:%.*]] = add i16 [[I7]], [[I4]]
+; CHECK-NEXT:    [[I7:%.*]] = or i16 [[I4]], [[I6]]
+; CHECK-NEXT:    [[I8:%.*]] = add i16 [[I7]], 42
 ; CHECK-NEXT:    ret i16 [[I8]]
 ;
   %i = getelementptr inbounds i8, i8* %ptr, i64 1
@@ -34,8 +34,8 @@ define i16 @p1_i8_i8_i16_swapped(i8* %ptr) {
 ; CHECK-NEXT:    [[I4:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 1
 ; CHECK-NEXT:    [[I5:%.*]] = load i8, i8* [[I4]], align 1
 ; CHECK-NEXT:    [[I6:%.*]] = zext i8 [[I5]] to i16
-; CHECK-NEXT:    [[I7:%.*]] = add i16 [[I6]], 42
-; CHECK-NEXT:    [[I8:%.*]] = add i16 [[I7]], [[I3]]
+; CHECK-NEXT:    [[I7:%.*]] = or i16 [[I3]], [[I6]]
+; CHECK-NEXT:    [[I8:%.*]] = add i16 [[I7]], 42
 ; CHECK-NEXT:    ret i16 [[I8]]
 ;
   %i = load i8, i8* %ptr
@@ -58,8 +58,8 @@ define i16 @p2(i8* %ptr) {
 ; CHECK-NEXT:    [[I4:%.*]] = shl i16 [[I3]], 9
 ; CHECK-NEXT:    [[I5:%.*]] = load i8, i8* [[PTR]], align 1
 ; CHECK-NEXT:    [[I6:%.*]] = zext i8 [[I5]] to i16
-; CHECK-NEXT:    [[I7:%.*]] = add i16 [[I6]], 42
-; CHECK-NEXT:    [[I8:%.*]] = add i16 [[I7]], [[I4]]
+; CHECK-NEXT:    [[I7:%.*]] = or i16 [[I4]], [[I6]]
+; CHECK-NEXT:    [[I8:%.*]] = add i16 [[I7]], 42
 ; CHECK-NEXT:    ret i16 [[I8]]
 ;
   %i = getelementptr inbounds i8, i8* %ptr, i64 1
@@ -79,8 +79,8 @@ define i16 @p3(i8* %ptr) {
 ; CHECK-NEXT:    [[I:%.*]] = load i8, i8* [[PTR:%.*]], align 1
 ; CHECK-NEXT:    [[I2:%.*]] = zext i8 [[I]] to i16
 ; CHECK-NEXT:    [[I3:%.*]] = shl i16 [[I2]], 8
-; CHECK-NEXT:    [[I4:%.*]] = add i16 [[I2]], 42
-; CHECK-NEXT:    [[I5:%.*]] = add i16 [[I4]], [[I3]]
+; CHECK-NEXT:    [[I4:%.*]] = or i16 [[I3]], [[I2]]
+; CHECK-NEXT:    [[I5:%.*]] = add i16 [[I4]], 42
 ; CHECK-NEXT:    ret i16 [[I5]]
 ;
   %i = load i8, i8* %ptr