[llvm] r341288 - [Reassociate] swap binop operands to increase factoring potential

Sun Sep 2 07:22:54 PDT 2018

Author: spatel
Date: Sun Sep  2 07:22:54 2018
New Revision: 341288

URL: http://llvm.org/viewvc/llvm-project?rev=341288&view=rev
Log:
[Reassociate] swap binop operands to increase factoring potential

If we have a pair of binops feeding another pair of binops, rearrange the operands so 
the matching pair are together because that allows easy factorization folds to happen 
in instcombine:
((X << S) & Y) & (Z << S) --> ((X << S) & (Z << S)) & Y (reassociation)

--> ((X & Z) << S) & Y (factorize shift from 'and' ops optimization)

This is part of solving PR37098:
https://bugs.llvm.org/show_bug.cgi?id=37098

Note that there's an instcombine version of this patch attached there, but we're trying
to make instcombine have less responsibility to improve compile-time efficiency.

For reasons I still don't completely understand, reassociate does this kind of transform
sometimes, but misses everything in my motivating cases.

This patch on its own is gluing an independent cleanup chunk to the end of the existing 
RewriteExprTree() loop. We can build on it and do something stronger to better order the 
full expression tree like D40049. That might be an alternative to the proposal to add a 
separate reassociation pass like D41574.

Differential Revision: https://reviews.llvm.org/D45842

Modified:
    llvm/trunk/include/llvm/Transforms/Scalar/Reassociate.h
    llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp
    llvm/trunk/test/Transforms/Reassociate/matching-binops.ll

Modified: llvm/trunk/include/llvm/Transforms/Scalar/Reassociate.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar/Reassociate.h?rev=341288&r1=341287&r2=341288&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Transforms/Scalar/Reassociate.h (original)
+++ llvm/trunk/include/llvm/Transforms/Scalar/Reassociate.h Sun Sep  2 07:22:54 2018
@@ -118,6 +118,7 @@ private:
   void OptimizeInst(Instruction *I);
   Instruction *canonicalizeNegConstExpr(Instruction *I);
   void BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT);
+  void swapOperandsToMatchBinops(BinaryOperator &B);
 };
 
 } // end namespace llvm

Modified: llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp?rev=341288&r1=341287&r2=341288&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp Sun Sep  2 07:22:54 2018
@@ -63,6 +63,7 @@
 
 using namespace llvm;
 using namespace reassociate;
+using namespace PatternMatch;
 
 #define DEBUG_TYPE "reassociate"
 
@@ -2131,6 +2132,66 @@ void ReassociatePass::OptimizeInst(Instr
   ReassociateExpression(BO);
 }
 
+/// If we have an associative pair of binops with the same opcode and 2 of the 3
+/// operands to that pair of binops are some other matching binop, rearrange the
+/// operands of the associative binops so the matching ops are paired together.
+/// This transform creates factoring opportunities by pairing opcodes.
+/// TODO: Should those factoring optimizations be handled here or InstCombine?
+/// Example:
+///   ((X << S) & Y) & (Z << S) --> ((X << S) & (Z << S)) & Y (reassociation)
+///     --> ((X & Z) << S) & Y (factorize shift from 'and' ops optimization)
+void ReassociatePass::swapOperandsToMatchBinops(BinaryOperator &B) {
+  BinaryOperator *B0, *B1;
+  if (!B.isAssociative() || !B.isCommutative() ||
+      !match(&B, m_BinOp(m_BinOp(B0), m_BinOp(B1))))
+    return;
+
+  // We have (B0 op B1) where both operands are also binops.
+  // Canonicalize a binop with the same opcode as the parent binop (B) to B0 and
+  // a binop with a different opcode to B1.
+  Instruction::BinaryOps TopOpc = B.getOpcode();
+  if (B0->getOpcode() != TopOpc)
+    std::swap(B0, B1);
+
+  // If (1) we don't have a pair of binops with the same opcode or (2) B0 and B1
+  // already have the same opcode, there is nothing to do. If the binop with the
+  // same opcode (B0) has more than one use, reassociation would result in more
+  // instructions, so bail out.
+  Instruction::BinaryOps OtherOpc = B1->getOpcode();
+  if (B0->getOpcode() != TopOpc || !B0->hasOneUse() || OtherOpc == TopOpc)
+    return;
+
+  // Canonicalize a binop that matches B1 to V00 (operand 0 of B0) and a value
+  // that does not match B1 to V01.
+  Value *V00 = B0->getOperand(0), *V01 = B0->getOperand(1);
+  if (!match(V00, m_BinOp()) ||
+      cast<BinaryOperator>(V00)->getOpcode() != OtherOpc)
+    std::swap(V00, V01);
+
+  // We need a binop with the same opcode in V00, and a value with a different
+  // opcode in V01.
+  BinaryOperator *B00, *B01;
+  if (!match(V00, m_BinOp(B00)) || B00->getOpcode() != OtherOpc ||
+      (match(V01, m_BinOp(B01)) && B01->getOpcode() == OtherOpc))
+    return;
+
+  // B00 and B1 are displaced matching binops, so pull them together:
+  // (B00 & V01) & B1  --> (B00 & B1) & V01
+  IRBuilder<> Builder(&B);
+  Builder.SetInstDebugLocation(&B);
+  Value *NewBO1 = Builder.CreateBinOp(TopOpc, B00, B1);
+  Value *NewBO2 = Builder.CreateBinOp(TopOpc, NewBO1, V01);
+
+  // Fast-math-flags propagate from B; wrapping flags are cleared.
+  if (auto *I1 = dyn_cast<Instruction>(NewBO1))
+    I1->copyIRFlags(&B, false);
+  if (auto *I2 = dyn_cast<Instruction>(NewBO2))
+    I2->copyIRFlags(&B, false);
+
+  B.replaceAllUsesWith(NewBO2);
+  return;
+}
+
 void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
   // First, walk the expression tree, linearizing the tree, collecting the
   // operand information.
@@ -2250,6 +2311,9 @@ void ReassociatePass::ReassociateExpress
   // Now that we ordered and optimized the expressions, splat them back into
   // the expression tree, removing any unneeded nodes.
   RewriteExprTree(I, Ops);
+
+  // Try a final reassociation of the root of the tree.
+  swapOperandsToMatchBinops(*I);
 }
 
 void

Modified: llvm/trunk/test/Transforms/Reassociate/matching-binops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Reassociate/matching-binops.ll?rev=341288&r1=341287&r2=341288&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/Reassociate/matching-binops.ll (original)
+++ llvm/trunk/test/Transforms/Reassociate/matching-binops.ll Sun Sep  2 07:22:54 2018
@@ -16,8 +16,8 @@ define i8 @and_shl(i8 %x, i8 %y, i8 %z,
 ; CHECK-LABEL: @and_shl(
 ; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = and i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = shl i8 %x, %shamt
@@ -31,8 +31,8 @@ define i8 @or_shl(i8 %x, i8 %y, i8 %z, i
 ; CHECK-LABEL: @or_shl(
 ; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = or i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = shl i8 %x, %shamt
@@ -46,8 +46,8 @@ define i8 @xor_shl(i8 %x, i8 %y, i8 %z,
 ; CHECK-LABEL: @xor_shl(
 ; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = xor i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = xor i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = xor i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = shl i8 %x, %shamt
@@ -61,8 +61,8 @@ define i8 @and_lshr(i8 %x, i8 %y, i8 %z,
 ; CHECK-LABEL: @and_lshr(
 ; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = and i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = lshr i8 %x, %shamt
@@ -76,8 +76,8 @@ define i8 @or_lshr(i8 %x, i8 %y, i8 %z,
 ; CHECK-LABEL: @or_lshr(
 ; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = or i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = lshr i8 %x, %shamt
@@ -91,8 +91,8 @@ define i8 @xor_lshr(i8 %x, i8 %y, i8 %z,
 ; CHECK-LABEL: @xor_lshr(
 ; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = xor i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = xor i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = xor i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = lshr i8 %x, %shamt
@@ -106,8 +106,8 @@ define i8 @and_ashr(i8 %x, i8 %y, i8 %z,
 ; CHECK-LABEL: @and_ashr(
 ; CHECK-NEXT:    [[SX:%.*]] = ashr i8 [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = ashr i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = and i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = ashr i8 %x, %shamt
@@ -121,8 +121,8 @@ define i8 @or_ashr(i8 %x, i8 %y, i8 %z,
 ; CHECK-LABEL: @or_ashr(
 ; CHECK-NEXT:    [[SX:%.*]] = ashr i8 [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = ashr i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = or i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = ashr i8 %x, %shamt
@@ -138,8 +138,8 @@ define <2 x i8> @xor_ashr(<2 x i8> %x, <
 ; CHECK-LABEL: @xor_ashr(
 ; CHECK-NEXT:    [[SX:%.*]] = ashr <2 x i8> [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = ashr <2 x i8> [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = xor <2 x i8> [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = xor <2 x i8> [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = xor <2 x i8> [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i8> [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %sx = ashr <2 x i8> %x, %shamt
@@ -203,19 +203,29 @@ define i8 @xor_lshr_multiuse(i8 %x, i8 %
 }
 
 ; Math ops work too. Change instruction positions too to verify placement.
+; We only care about extra uses of the first associative value - in this
+; case, it's %a. Everything else can have extra uses.
+
+declare void @use(i8)
 
 define i8 @add_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) {
 ; CHECK-LABEL: @add_lshr(
 ; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = add i8 [[SX]], [[Z:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[R:%.*]] = add i8 [[A]], [[SY]]
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    call void @use(i8 [[SX]])
+; CHECK-NEXT:    call void @use(i8 [[SY]])
+; CHECK-NEXT:    call void @use(i8 [[TMP2]])
+; CHECK-NEXT:    ret i8 [[TMP2]]
 ;
   %sx = lshr i8 %x, %shamt
   %a = add i8 %sx, %z
   %sy = lshr i8 %y, %shamt
   %r = add i8 %a, %sy
+  call void @use(i8 %sx)
+  call void @use(i8 %sy)
+  call void @use(i8 %r)
   ret i8 %r
 }
 
@@ -225,8 +235,8 @@ define i8 @mul_sub(i8 %x, i8 %y, i8 %z,
 ; CHECK-LABEL: @mul_sub(
 ; CHECK-NEXT:    [[SX:%.*]] = sub i8 [[X:%.*]], [[M:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = sub i8 [[Y:%.*]], [[M]]
-; CHECK-NEXT:    [[A:%.*]] = mul nsw i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = mul nuw i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = mul i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = mul i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = sub i8 %x, %m
@@ -239,9 +249,9 @@ define i8 @mul_sub(i8 %x, i8 %y, i8 %z,
 define i8 @add_mul(i8 %x, i8 %y, i8 %z, i8 %m) {
 ; CHECK-LABEL: @add_mul(
 ; CHECK-NEXT:    [[SX:%.*]] = mul nuw i8 [[X:%.*]], 42
-; CHECK-NEXT:    [[A:%.*]] = add nuw i8 [[Z:%.*]], [[SX]]
 ; CHECK-NEXT:    [[SY:%.*]] = mul nsw i8 [[M:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = add nsw i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = add i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = mul nuw i8 %x, 42
@@ -257,9 +267,9 @@ define i8 @add_mul(i8 %x, i8 %y, i8 %z,
 define float @fadd_fmul(float %x, float %y, float %z, float %m) {
 ; CHECK-LABEL: @fadd_fmul(
 ; CHECK-NEXT:    [[SX:%.*]] = fmul float [[X:%.*]], [[M:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = fadd fast float [[SX]], [[Z:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = fmul float [[Y:%.*]], [[M]]
-; CHECK-NEXT:    [[R:%.*]] = fadd fast float [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = fadd fast float [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = fadd fast float [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret float [[R]]
 ;
   %sx = fmul float %x, %m
@@ -273,8 +283,8 @@ define float @fmul_fdiv(float %x, float
 ; CHECK-LABEL: @fmul_fdiv(
 ; CHECK-NEXT:    [[SX:%.*]] = fdiv float [[X:%.*]], [[M:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = fdiv float [[Y:%.*]], 4.200000e+01
-; CHECK-NEXT:    [[A:%.*]] = fmul fast float [[SY]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = fmul fast float [[A]], [[SX]]
+; CHECK-NEXT:    [[A:%.*]] = fmul fast float [[SY]], [[SX]]
+; CHECK-NEXT:    [[R:%.*]] = fmul fast float [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret float [[R]]
 ;
   %sx = fdiv float %x, %m
@@ -284,7 +294,7 @@ define float @fmul_fdiv(float %x, float
   ret float %r
 }
 
-; Verify that debug info for modified instructions gets discarded (references become undef).
+; Verify that debug info for modified instructions is not invalid.
 
 define i32 @and_shl_dbg(i32 %x, i32 %y, i32 %z, i32 %shamt) {
 ; CHECK-LABEL: @and_shl_dbg(
@@ -296,11 +306,11 @@ define i32 @and_shl_dbg(i32 %x, i32 %y,
 ; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[SHL]], metadata !16, metadata !DIExpression()), !dbg !25
 ; CHECK-NEXT:    [[SHL1:%.*]] = shl i32 [[Y]], [[SHAMT]], !dbg !26
 ; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[SHL1]], metadata !17, metadata !DIExpression()), !dbg !27
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], [[Z]], !dbg !28
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[AND]], metadata !18, metadata !DIExpression()), !dbg !29
-; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[AND]], [[SHL1]], !dbg !30
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[AND2]], metadata !19, metadata !DIExpression()), !dbg !31
-; CHECK-NEXT:    ret i32 [[AND2]], !dbg !32
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata !2, metadata !18, metadata !DIExpression()), !dbg !28
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[SHL]], [[SHL1]], !dbg !29
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Z]], !dbg !29
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[TMP2]], metadata !19, metadata !DIExpression()), !dbg !30
+; CHECK-NEXT:    ret i32 [[TMP2]], !dbg !31
 ;
   call void @llvm.dbg.value(metadata i32 %x, metadata !13, metadata !DIExpression()), !dbg !21
   call void @llvm.dbg.value(metadata i32 %y, metadata !14, metadata !DIExpression()), !dbg !22