[llvm] a0b175c - [SimplifyCFG] Treat `extract oneuse(op.with.overflow),1` pattern as a single instruction (#128021)

Fri Mar 14 06:19:01 PDT 2025

Author: Gábor Spaits
Date: 2025-03-14T14:18:57+01:00
New Revision: a0b175cb348bf86630276077ebda4adbd8ffd84e

URL: https://github.com/llvm/llvm-project/commit/a0b175cb348bf86630276077ebda4adbd8ffd84e
DIFF: https://github.com/llvm/llvm-project/commit/a0b175cb348bf86630276077ebda4adbd8ffd84e.diff

LOG: [SimplifyCFG] Treat `extract oneuse(op.with.overflow),1` pattern as a single instruction (#128021)

Closes #115683 .

Overflow arithmetic instruction plus extract value are usually generated
when a division is being replaced, but the zero check may still be
there. In that case hoist these two instructions out of this basic
block, and let later optimizations take care of the unnecessary zero
checks.

Added: 
    llvm/test/Transforms/SimplifyCFG/RISCV/umul-extract-pattern.ll

Modified: 
    llvm/lib/Transforms/Utils/SimplifyCFG.cpp
    llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 833dc7331aecd..09bf2c7daf06a 100644

--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -422,11 +422,11 @@ static InstructionCost computeSpeculationCost(const User *I,
 /// After this function returns, Cost is increased by the cost of
 /// V plus its non-dominating operands.  If that cost is greater than
 /// Budget, false is returned and Cost is undefined.
-static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt,
-                                SmallPtrSetImpl<Instruction *> &AggressiveInsts,
-                                InstructionCost &Cost, InstructionCost Budget,
-                                const TargetTransformInfo &TTI,
-                                AssumptionCache *AC, unsigned Depth = 0) {
+static bool dominatesMergePoint(
+    Value *V, BasicBlock *BB, Instruction *InsertPt,
+    SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
+    InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC,
+    SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
   // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
   // so limit the recursion depth.
   // TODO: While this recursion limit does prevent pathological behavior, it
@@ -464,7 +464,17 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt,
   if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
     return false;
 
-  Cost += computeSpeculationCost(I, TTI);
+  // Overflow arithmetic instruction plus extract value are usually generated
+  // when a division is being replaced. But, in this case, the zero check may
+  // still be kept in the code. In that case it would be worth to hoist these
+  // two instruction out of the basic block. Let's treat this pattern as one
+  // single cheap instruction here!
+  WithOverflowInst *OverflowInst;
+  if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
+    ZeroCostInstructions.insert(OverflowInst);
+    Cost += 1;
+  } else if (!ZeroCostInstructions.contains(I))
+    Cost += computeSpeculationCost(I, TTI);
 
   // Allow exactly one instruction to be speculated regardless of its cost
   // (as long as it is safe to do so).
@@ -481,7 +491,7 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt,
   // not take us over the cost threshold.
   for (Use &Op : I->operands())
     if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
-                             TTI, AC, Depth + 1))
+                             TTI, AC, ZeroCostInstructions, Depth + 1))
       return false;
   // Okay, it's safe to do this!  Remember this instruction.
   AggressiveInsts.insert(I);
@@ -3725,6 +3735,7 @@ static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
   // instructions.  While we are at it, keep track of the instructions
   // that need to be moved to the dominating block.
   SmallPtrSet<Instruction *, 4> AggressiveInsts;
+  SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
   InstructionCost Cost = 0;
   InstructionCost Budget =
       TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
@@ -3742,9 +3753,11 @@ static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
     }
 
     if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
-                             AggressiveInsts, Cost, Budget, TTI, AC) ||
+                             AggressiveInsts, Cost, Budget, TTI, AC,
+                             ZeroCostInstructions) ||
         !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
-                             AggressiveInsts, Cost, Budget, TTI, AC))
+                             AggressiveInsts, Cost, Budget, TTI, AC,
+                             ZeroCostInstructions))
       return Changed;
   }
 

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll b/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll
index 7bcb6ce17df0e..9858591dfc700 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll
@@ -45,26 +45,17 @@ define i1 @will_not_overflow(i64 %arg, i64 %arg1) {
 ; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_not_overflow(
 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT:  bb:
 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
-; INSTCOMBINESIMPLIFYCFGONLY:       bb2:
 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    br label [[BB5]]
-; INSTCOMBINESIMPLIFYCFGONLY:       bb5:
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[T6:%.*]] = select i1 [[T0]], i1 false, i1 [[MUL_OV]]
 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    ret i1 [[T6]]
 ;
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_not_overflow(
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:  bb:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE:       bb2:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[ARG1:%.*]] = freeze i64 [[ARG2:%.*]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1]])
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    br label [[BB5]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE:       bb5:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    ret i1 [[T6]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    ret i1 [[MUL_OV]]
 ;
 bb:
   %t0 = icmp eq i64 %arg, 0
@@ -112,28 +103,19 @@ define i1 @will_overflow(i64 %arg, i64 %arg1) {
 ; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_overflow(
 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT:  bb:
 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
-; INSTCOMBINESIMPLIFYCFGONLY:       bb2:
 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    br label [[BB5]]
-; INSTCOMBINESIMPLIFYCFGONLY:       bb5:
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[T6:%.*]] = select i1 [[T0]], i1 true, i1 [[PHI_BO]]
 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    ret i1 [[T6]]
 ;
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_overflow(
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:  bb:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE:       bb2:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[ARG1:%.*]] = freeze i64 [[ARG2:%.*]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1]])
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    br label [[BB5]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE:       bb5:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    ret i1 [[T6]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    ret i1 [[PHI_BO]]
 ;
 bb:
   %t0 = icmp eq i64 %arg, 0

diff  --git a/llvm/test/Transforms/SimplifyCFG/RISCV/umul-extract-pattern.ll b/llvm/test/Transforms/SimplifyCFG/RISCV/umul-extract-pattern.ll
new file mode 100644
index 0000000000000..0d431e2293b69
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/RISCV/umul-extract-pattern.ll
@@ -0,0 +1,129 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s
+target triple = "riscv64-unknown-unknown-elf"
+
+define i16 @basicScenario(i64 %x, i64 %y) {
+; CHECK-LABEL: @basicScenario(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
+; CHECK-NEXT:    [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X:%.*]])
+; CHECK-NEXT:    [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[TMP0]] to i16
+; CHECK-NEXT:    ret i16 [[CONV]]
+;
+entry:
+  %cmp.not = icmp eq i64 %y, 0
+  br i1 %cmp.not, label %land.end, label %land.rhs
+
+land.rhs:                                         ; preds = %entry
+  %mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x)
+  %mul.ov = extractvalue { i64, i1 } %mul, 1
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %entry
+  %result = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
+  %conv = zext i1 %result to i16
+  ret i16 %conv
+}
+
+define i16 @samePatternTwice(i64 %x, i64 %y) {
+; CHECK-LABEL: @samePatternTwice(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
+; CHECK-NEXT:    [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X:%.*]])
+; CHECK-NEXT:    [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; CHECK-NEXT:    [[MUL2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X]])
+; CHECK-NEXT:    [[MUL_OV2:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV2]]
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[TMP0]] to i16
+; CHECK-NEXT:    [[CONV2:%.*]] = zext i1 [[TMP1]] to i16
+; CHECK-NEXT:    [[TORET:%.*]] = add nsw i16 [[CONV]], [[CONV2]]
+; CHECK-NEXT:    ret i16 [[TORET]]
+;
+entry:
+  %cmp.not = icmp eq i64 %y, 0
+  br i1 %cmp.not, label %land.end, label %land.rhs
+
+land.rhs:                                         ; preds = %entry
+  %mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x)
+  %mul.ov = extractvalue { i64, i1 } %mul, 1
+  %mul2 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x)
+  %mul.ov2 = extractvalue { i64, i1 } %mul2, 1
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %entry
+  %result1 = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
+  %result2 = phi i1 [ false, %entry ], [ %mul.ov2, %land.rhs ]
+  %conv1 = zext i1 %result1 to i16
+  %conv2 = zext i1 %result2 to i16
+  %toRet = add nsw i16 %conv1, %conv2
+  ret i16 %toRet
+}
+
+define i16 @stillHoistNotTooExpensive(i64 %x, i64 %y) {
+; CHECK-LABEL: @stillHoistNotTooExpensive(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[Y]], [[X:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ADD]], i64 [[X]])
+; CHECK-NEXT:    [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[TMP0]] to i16
+; CHECK-NEXT:    ret i16 [[CONV]]
+;
+entry:
+  %cmp.not = icmp eq i64 %y, 0
+  br i1 %cmp.not, label %land.end, label %land.rhs
+
+land.rhs:                                   ; preds = %entry
+  %add = add nsw i64 %y, %x
+  %mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %add, i64 %x)
+  %mul.ov = extractvalue { i64, i1 } %mul, 1
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %entry
+  %result = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
+  %conv = zext i1 %result to i16
+  ret i16 %conv
+}
+
+define i16 @noHoistTooExpensive(i64 %x, i64 %y) {
+; CHECK-LABEL: @noHoistTooExpensive(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[LAND_END:%.*]], label [[LAND_RHS:%.*]]
+; CHECK:       land.rhs:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[Y]], [[X:%.*]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add nsw i64 [[Y]], [[ADD]]
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i64 [[ADD]], [[ADD2]]
+; CHECK-NEXT:    [[ADD4:%.*]] = add nsw i64 [[ADD2]], [[ADD3]]
+; CHECK-NEXT:    [[ADD5:%.*]] = add nsw i64 [[ADD3]], [[ADD4]]
+; CHECK-NEXT:    [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ADD5]], i64 [[X]])
+; CHECK-NEXT:    [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; CHECK-NEXT:    br label [[LAND_END]]
+; CHECK:       land.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[MUL_OV]], [[LAND_RHS]] ]
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[TMP0]] to i16
+; CHECK-NEXT:    ret i16 [[CONV]]
+;
+entry:
+  %cmp.not = icmp eq i64 %y, 0
+  br i1 %cmp.not, label %land.end, label %land.rhs
+
+land.rhs:                                   ; preds = %entry
+  %add = add nsw i64 %y, %x
+  %add2 = add nsw i64 %y, %add
+  %add3 = add nsw i64 %add, %add2
+  %add4 = add nsw i64 %add2, %add3
+  %add5 = add nsw i64 %add3, %add4
+  %mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %add5, i64 %x)
+  %mul.ov = extractvalue { i64, i1 } %mul, 1
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %entry
+  %result = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
+  %conv = zext i1 %result to i16
+  ret i16 %conv
+}