[llvm] 34ff90a - [Reassociate] Don't convert add-like-or's into add's if they appear to be part of load-combining idiom
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 18 06:55:37 PST 2020
Author: Roman Lebedev
Date: 2020-11-18T17:55:02+03:00
New Revision: 34ff90ad5d7d17a464bff9abbad70229c693a551
URL: https://github.com/llvm/llvm-project/commit/34ff90ad5d7d17a464bff9abbad70229c693a551
DIFF: https://github.com/llvm/llvm-project/commit/34ff90ad5d7d17a464bff9abbad70229c693a551.diff
LOG: [Reassociate] Don't convert add-like-or's into add's if they appear to be part of load-combining idiom
As Wei Mi is reporting in post-commit review
https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20201116/853479.html
teaching -reassociate about add-like-or's (70472f3) results in breaking apart
load widening patterns, and reassociating them.
For now, simply exclude any such `or` that appears to be a root of
load widening idiom from the or->add transformation.
Note that the heuristic is greedy, it doesn't ensure that loads
can *actually* be widened into a single load.
Added:
Modified:
llvm/lib/Transforms/Scalar/Reassociate.cpp
llvm/test/Transforms/Reassociate/load-combine-like-or.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 73fdd6ea1fd4..252677906854 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -920,8 +920,66 @@ static Value *NegateValue(Value *V, Instruction *BI,
return NewNeg;
}
+// See if this `or` looks like an load widening reduction, i.e. that it
+// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
+// ensure that the pattern is *really* a load widening reduction,
+// we do not ensure that it can really be replaced with a widened load,
+// only that it mostly looks like one.
+static bool isLoadCombineCandidate(Instruction *Or) {
+ SmallVector<Instruction *, 8> Worklist;
+ SmallSet<Instruction *, 8> Visited;
+
+ auto Enqueue = [&](Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ // Each node of an `or` reduction must be an instruction,
+ if (!I)
+ return false; // Node is certainly not part of an `or` load reduction.
+ // Only process instructions we have never processed before.
+ if (Visited.insert(I).second)
+ Worklist.emplace_back(I);
+ return true; // Will need to look at parent nodes.
+ };
+
+ if (!Enqueue(Or))
+ return false; // Not an `or` reduction pattern.
+
+ while (!Worklist.empty()) {
+ auto *I = Worklist.pop_back_val();
+
+ // Okay, which instruction is this node?
+ switch (I->getOpcode()) {
+ case Instruction::Or:
+ // Got an `or` node. That's fine, just recurse into it's operands.
+ for (Value *Op : I->operands())
+ if (!Enqueue(Op))
+ return false; // Not an `or` reduction pattern.
+ continue;
+
+ case Instruction::Shl:
+ case Instruction::ZExt:
+ // `shl`/`zext` nodes are fine, just recurse into their base operand.
+ if (!Enqueue(I->getOperand(0)))
+ return false; // Not an `or` reduction pattern.
+ continue;
+
+ case Instruction::Load:
+ // Perfect, `load` node means we've reached an edge of the graph.
+ continue;
+
+ default: // Unknown node.
+ return false; // Not an `or` reduction pattern.
+ }
+ }
+
+ return true;
+}
+
/// Return true if it may be profitable to convert this (X|Y) into (X+Y).
static bool ShouldConvertOrWithNoCommonBitsToAdd(Instruction *Or) {
+ // If this `or` appears to be a part of an load widening reduction, ignore it.
+ if (isLoadCombineCandidate(Or))
+ return false;
+
// Don't bother to convert this up unless either the LHS is an associable add
// or subtract or mul or if this is only used by one of the above.
// This is only a compile-time improvement, it is not needed for correctness!
diff --git a/llvm/test/Transforms/Reassociate/load-combine-like-or.ll b/llvm/test/Transforms/Reassociate/load-combine-like-or.ll
index 7058acbcb760..06ac9347f3d8 100644
--- a/llvm/test/Transforms/Reassociate/load-combine-like-or.ll
+++ b/llvm/test/Transforms/Reassociate/load-combine-like-or.ll
@@ -10,8 +10,8 @@ define i16 @p0_i8_i8_i16(i8* %ptr) {
; CHECK-NEXT: [[I4:%.*]] = shl i16 [[I3]], 8
; CHECK-NEXT: [[I5:%.*]] = load i8, i8* [[PTR]], align 1
; CHECK-NEXT: [[I6:%.*]] = zext i8 [[I5]] to i16
-; CHECK-NEXT: [[I7:%.*]] = add i16 [[I6]], 42
-; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], [[I4]]
+; CHECK-NEXT: [[I7:%.*]] = or i16 [[I4]], [[I6]]
+; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], 42
; CHECK-NEXT: ret i16 [[I8]]
;
%i = getelementptr inbounds i8, i8* %ptr, i64 1
@@ -34,8 +34,8 @@ define i16 @p1_i8_i8_i16_swapped(i8* %ptr) {
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 1
; CHECK-NEXT: [[I5:%.*]] = load i8, i8* [[I4]], align 1
; CHECK-NEXT: [[I6:%.*]] = zext i8 [[I5]] to i16
-; CHECK-NEXT: [[I7:%.*]] = add i16 [[I6]], 42
-; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], [[I3]]
+; CHECK-NEXT: [[I7:%.*]] = or i16 [[I3]], [[I6]]
+; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], 42
; CHECK-NEXT: ret i16 [[I8]]
;
%i = load i8, i8* %ptr
@@ -58,8 +58,8 @@ define i16 @p2(i8* %ptr) {
; CHECK-NEXT: [[I4:%.*]] = shl i16 [[I3]], 9
; CHECK-NEXT: [[I5:%.*]] = load i8, i8* [[PTR]], align 1
; CHECK-NEXT: [[I6:%.*]] = zext i8 [[I5]] to i16
-; CHECK-NEXT: [[I7:%.*]] = add i16 [[I6]], 42
-; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], [[I4]]
+; CHECK-NEXT: [[I7:%.*]] = or i16 [[I4]], [[I6]]
+; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], 42
; CHECK-NEXT: ret i16 [[I8]]
;
%i = getelementptr inbounds i8, i8* %ptr, i64 1
@@ -79,8 +79,8 @@ define i16 @p3(i8* %ptr) {
; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[PTR:%.*]], align 1
; CHECK-NEXT: [[I2:%.*]] = zext i8 [[I]] to i16
; CHECK-NEXT: [[I3:%.*]] = shl i16 [[I2]], 8
-; CHECK-NEXT: [[I4:%.*]] = add i16 [[I2]], 42
-; CHECK-NEXT: [[I5:%.*]] = add i16 [[I4]], [[I3]]
+; CHECK-NEXT: [[I4:%.*]] = or i16 [[I3]], [[I2]]
+; CHECK-NEXT: [[I5:%.*]] = add i16 [[I4]], 42
; CHECK-NEXT: ret i16 [[I5]]
;
%i = load i8, i8* %ptr
More information about the llvm-commits
mailing list