[llvm] ffb2af3 - [SCEVExpander] Attempt to reinfer flags dropped due to CSE (#72431)

via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 7 13:20:40 PST 2023


Author: Philip Reames
Date: 2023-12-07T13:20:36-08:00
New Revision: ffb2af3ed6a95a4eb55b81a3d1351d5d4bd66eb5

URL: https://github.com/llvm/llvm-project/commit/ffb2af3ed6a95a4eb55b81a3d1351d5d4bd66eb5
DIFF: https://github.com/llvm/llvm-project/commit/ffb2af3ed6a95a4eb55b81a3d1351d5d4bd66eb5.diff

LOG: [SCEVExpander] Attempt to reinfer flags dropped due to CSE (#72431)

LSR uses SCEVExpander to generate induction formulas. The expander
internally tries to reuse existing IR expressions. To do that, it needs
to strip any poison generating flags (nsw, nuw, exact, nneg, etc..)
which may not be valid for the newly added users.

This is conservatively correct, but has the effect that LSR will strip
nneg flags on zext instructions involved in trip counts in loop
preheaders. To avoid this, this patch adjusts the expanded to reinfer
the flags on the CSE candidate if legal for all possible users.

This should fix the regression reported in
https://github.com/llvm/llvm-project/issues/71200.

This should arguably be done inside canReuseInstruction instead, but
doing it outside is more conservative compile time wise. Both
canReuseInstruction and isGuaranteedNotToBePoison walk operand lists, so
right now we are performing work which is roughly O(N^2) in the size of
the operand graph. We should fix that before making the per operand step
more expensive. My tenative plan is to land this, and then rework the
code to sink the logic into more core interfaces.

Added: 
    

Modified: 
    llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
    llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
    llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
    llvm/test/Transforms/IRCE/non-loop-invariant-rhs-instr.ll
    llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
    llvm/test/Transforms/LoopPredication/basic.ll
    llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 8bfe9e67d15e2f..cd3ac317cd238e 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1532,8 +1532,26 @@ Value *SCEVExpander::expand(const SCEV *S) {
     V = visit(S);
     V = fixupLCSSAFormFor(V);
   } else {
-    for (Instruction *I : DropPoisonGeneratingInsts)
+    for (Instruction *I : DropPoisonGeneratingInsts) {
       I->dropPoisonGeneratingFlagsAndMetadata();
+      // See if we can re-infer from first principles any of the flags we just
+      // dropped.
+      if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
+        if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
+          auto *BO = cast<BinaryOperator>(I);
+          BO->setHasNoUnsignedWrap(
+            ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) == SCEV::FlagNUW);
+          BO->setHasNoSignedWrap(
+            ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) == SCEV::FlagNSW);
+        }
+      if (auto *NNI = dyn_cast<PossiblyNonNegInst>(I)) {
+        auto *Src = NNI->getOperand(0);
+        if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src,
+                                    Constant::getNullValue(Src->getType()), I,
+                                    DL).value_or(false))
+          NNI->setNonNeg(true);
+      }
+    }
   }
   // Remember the expanded value for this SCEV at this location.
   //

diff  --git a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
index e73249739f356b..31ca8eab33508c 100644
--- a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
@@ -10,8 +10,7 @@ define void @test1(ptr nocapture noundef %a, i32 noundef signext %n) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    blez a1, .LBB0_3
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    slli a1, a1, 32
-; CHECK-NEXT:    srli a1, a1, 30
+; CHECK-NEXT:    slli a1, a1, 2
 ; CHECK-NEXT:    add a1, a0, a1
 ; CHECK-NEXT:  .LBB0_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1

diff  --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
index cc7b5a7f3f819f..7087041e8dace6 100644
--- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
@@ -385,19 +385,17 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
 ; CHECK-NEXT:    @ implicit-def: $r11
 ; CHECK-NEXT:    mov.w r9, #12
 ; CHECK-NEXT:    str r4, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    add.w r0, r0, r2, lsr #1
 ; CHECK-NEXT:    add.w r1, r1, r2, lsr #1
-; CHECK-NEXT:    movw r2, #65532
-; CHECK-NEXT:    vdup.32 q6, r0
-; CHECK-NEXT:    movt r2, #32767
-; CHECK-NEXT:    and.w r3, r1, r2
+; CHECK-NEXT:    add.w r0, r0, r2, lsr #1
+; CHECK-NEXT:    bic r3, r1, #3
 ; CHECK-NEXT:    adr r1, .LCPI1_0
-; CHECK-NEXT:    vdup.32 q7, r0
 ; CHECK-NEXT:    vldrw.u32 q0, [r1]
 ; CHECK-NEXT:    adr r1, .LCPI1_1
 ; CHECK-NEXT:    vldrw.u32 q5, [r1]
-; CHECK-NEXT:    strd r3, r7, [sp, #4] @ 8-byte Folded Spill
+; CHECK-NEXT:    vdup.32 q6, r0
 ; CHECK-NEXT:    vadd.i32 q4, q0, r7
+; CHECK-NEXT:    vdup.32 q7, r0
+; CHECK-NEXT:    strd r3, r7, [sp, #4] @ 8-byte Folded Spill
 ; CHECK-NEXT:    b .LBB1_6
 ; CHECK-NEXT:  .LBB1_2: @ %for.body6.preheader
 ; CHECK-NEXT:    @ in Loop: Header=BB1_6 Depth=1

diff  --git a/llvm/test/Transforms/IRCE/non-loop-invariant-rhs-instr.ll b/llvm/test/Transforms/IRCE/non-loop-invariant-rhs-instr.ll
index c28fc59014f5c6..58dff360ff6a5c 100644
--- a/llvm/test/Transforms/IRCE/non-loop-invariant-rhs-instr.ll
+++ b/llvm/test/Transforms/IRCE/non-loop-invariant-rhs-instr.ll
@@ -9,7 +9,7 @@ define i32 @test_01(i32 %A, i64 %Len, ptr %array) {
 ; CHECK-NEXT:    br i1 [[TRIPCHECK]], label [[LOOP_PREHEADER:%.*]], label [[ZERO:%.*]]
 ; CHECK:       loop.preheader:
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[A:%.*]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
 ; CHECK-NEXT:    [[SMIN:%.*]] = call i64 @llvm.smin.i64(i64 [[LEN]], i64 0)
 ; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 [[LEN]], [[SMIN]]
 ; CHECK-NEXT:    [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 [[TMP1]])

diff  --git a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
index 60e014b0efca53..3f0ada281b1e34 100644
--- a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
+++ b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
@@ -182,7 +182,7 @@ define void @promote_latch_condition_decrementing_loop_01(ptr %p, ptr %a) {
 ; CHECK-LABEL: @promote_latch_condition_decrementing_loop_01(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[LEN:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG0:![0-9]+]]
-; CHECK-NEXT:    [[LEN_MINUS_1:%.*]] = add i32 [[LEN]], -1
+; CHECK-NEXT:    [[LEN_MINUS_1:%.*]] = add nsw i32 [[LEN]], -1
 ; CHECK-NEXT:    [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN]], 0
 ; CHECK-NEXT:    br i1 [[ZERO_CHECK]], label [[LOOPEXIT:%.*]], label [[PREHEADER:%.*]]
 ; CHECK:       preheader:

diff  --git a/llvm/test/Transforms/LoopPredication/basic.ll b/llvm/test/Transforms/LoopPredication/basic.ll
index 2a99963ad0786c..27c8bc99c407e0 100644
--- a/llvm/test/Transforms/LoopPredication/basic.ll
+++ b/llvm/test/Transforms/LoopPredication/basic.ll
@@ -1681,7 +1681,7 @@ define i32 @ne_latch_zext(ptr %array, i32 %length, i16 %n16) {
 ; CHECK-LABEL: @ne_latch_zext(
 ; CHECK-NEXT:  loop.preheader:
 ; CHECK-NEXT:    [[N:%.*]] = zext i16 [[N16:%.*]] to i32
-; CHECK-NEXT:    [[NPLUS1:%.*]] = add i32 [[N]], 1
+; CHECK-NEXT:    [[NPLUS1:%.*]] = add nuw nsw i32 [[N]], 1
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule i32 [[NPLUS1]], [[LENGTH:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 0, [[LENGTH]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]]

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
index cf875ccdc147ab..669306c8f3ab74 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
@@ -96,7 +96,7 @@ define void @pr56282() {
 ; CHECK:       inner.2.preheader:
 ; CHECK-NEXT:    br label [[INNER_2]]
 ; CHECK:       inner.2:
-; CHECK-NEXT:    [[OUTER_IV_NEXT]] = add i64 [[OUTER_IV]], 1
+; CHECK-NEXT:    [[OUTER_IV_NEXT]] = add nuw i64 [[OUTER_IV]], 1
 ; CHECK-NEXT:    br label [[OUTER_HEADER]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void


        


More information about the llvm-commits mailing list