[llvm] [SimplifyCFG] Treat umul + extract pattern as cheap single instruction (#115683) (Approach 2) (PR #128021)
Gábor Spaits via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 03:48:03 PST 2025
https://github.com/spaits updated https://github.com/llvm/llvm-project/pull/128021
>From 9b11238b239526b97a551250edbdabdd9ba94442 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Thu, 20 Feb 2025 16:48:13 +0100
Subject: [PATCH 1/5] [SimplifyCFG] Treat umul + extract pattern as cheap
single instruction
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 24 +++++---
.../X86/unsigned-multiply-overflow-check.ll | 34 +++---------
.../SimplifyCFG/umul-extract-pattern.ll | 55 +++++++++++++++++++
3 files changed, 78 insertions(+), 35 deletions(-)
create mode 100644 llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 12dd49da279b9..157716dc7ead9 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -421,11 +421,11 @@ static InstructionCost computeSpeculationCost(const User *I,
/// After this function returns, Cost is increased by the cost of
/// V plus its non-dominating operands. If that cost is greater than
/// Budget, false is returned and Cost is undefined.
-static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt,
- SmallPtrSetImpl<Instruction *> &AggressiveInsts,
- InstructionCost &Cost, InstructionCost Budget,
- const TargetTransformInfo &TTI,
- AssumptionCache *AC, unsigned Depth = 0) {
+static bool dominatesMergePoint(
+ Value *V, BasicBlock *BB, Instruction *InsertPt,
+ SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
+ InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC,
+ SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
// It is possible to hit a zero-cost cycle (phi/gep instructions for example),
// so limit the recursion depth.
// TODO: While this recursion limit does prevent pathological behavior, it
@@ -463,7 +463,12 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt,
if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
return false;
- Cost += computeSpeculationCost(I, TTI);
+ WithOverflowInst *OverflowInst;
+ if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
+ ZeroCostInstructions.insert(OverflowInst);
+ Cost += 1;
+ } else if (!ZeroCostInstructions.contains(I))
+ Cost += computeSpeculationCost(I, TTI);
// Allow exactly one instruction to be speculated regardless of its cost
// (as long as it is safe to do so).
@@ -480,7 +485,7 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt,
// not take us over the cost threshold.
for (Use &Op : I->operands())
if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
- TTI, AC, Depth + 1))
+ TTI, AC, ZeroCostInstructions, Depth + 1))
return false;
// Okay, it's safe to do this! Remember this instruction.
AggressiveInsts.insert(I);
@@ -3810,6 +3815,7 @@ static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// instructions. While we are at it, keep track of the instructions
// that need to be moved to the dominating block.
SmallPtrSet<Instruction *, 4> AggressiveInsts;
+ SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
InstructionCost Cost = 0;
InstructionCost Budget =
TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
@@ -3827,9 +3833,9 @@ static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
}
if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
- AggressiveInsts, Cost, Budget, TTI, AC) ||
+ AggressiveInsts, Cost, Budget, TTI, AC, ZeroCostInstructions) ||
!dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
- AggressiveInsts, Cost, Budget, TTI, AC))
+ AggressiveInsts, Cost, Budget, TTI, AC, ZeroCostInstructions))
return Changed;
}
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll b/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll
index 7bcb6ce17df0e..9858591dfc700 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll
@@ -45,26 +45,17 @@ define i1 @will_not_overflow(i64 %arg, i64 %arg1) {
; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_not_overflow(
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: bb:
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
-; INSTCOMBINESIMPLIFYCFGONLY: bb2:
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br label [[BB5]]
-; INSTCOMBINESIMPLIFYCFGONLY: bb5:
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 false, i1 [[MUL_OV]]
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: ret i1 [[T6]]
;
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_not_overflow(
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb2:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[ARG1:%.*]] = freeze i64 [[ARG2:%.*]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1]])
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br label [[BB5]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb5:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[MUL_OV]]
;
bb:
%t0 = icmp eq i64 %arg, 0
@@ -112,28 +103,19 @@ define i1 @will_overflow(i64 %arg, i64 %arg1) {
; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_overflow(
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: bb:
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
-; INSTCOMBINESIMPLIFYCFGONLY: bb2:
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br label [[BB5]]
-; INSTCOMBINESIMPLIFYCFGONLY: bb5:
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 true, i1 [[PHI_BO]]
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: ret i1 [[T6]]
;
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_overflow(
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb2:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[ARG1:%.*]] = freeze i64 [[ARG2:%.*]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1]])
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br label [[BB5]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb5:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[PHI_BO]]
;
bb:
%t0 = icmp eq i64 %arg, 0
diff --git a/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll b/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
new file mode 100644
index 0000000000000..8a5e034998a25
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s
+target triple = "riscv64-unknown-unknown-elf"
+
+define i16 @func2(i64 %x, i64 %y) {
+; CHECK-LABEL: @func2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
+; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X:%.*]])
+; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[SPEC_SELECT]] to i16
+; CHECK-NEXT: ret i16 [[CONV]]
+;
+entry:
+ %cmp.not = icmp eq i64 %y, 0
+ br i1 %cmp.not, label %land.end, label %land.rhs
+
+land.rhs: ; preds = %entry
+ %mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x)
+ %mul.ov = extractvalue { i64, i1 } %mul, 1
+ br label %land.end
+
+land.end: ; preds = %land.rhs, %entry
+ %0 = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
+ %conv = zext i1 %0 to i16
+ ret i16 %conv
+}
+
+define i16 @noHoist(i64 %x, i64 %y) {
+; CHECK-LABEL: @noHoist(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
+; CHECK-NEXT: [[ADD2:%.*]] = add nsw i64 [[Y]], [[X:%.*]]
+; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ADD2]], i64 [[X]])
+; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[SPEC_SELECT]] to i16
+; CHECK-NEXT: ret i16 [[CONV]]
+;
+entry:
+ %cmp.not = icmp eq i64 %y, 0
+ br i1 %cmp.not, label %land.end, label %land.rhs
+
+land.rhs: ; preds = %entry
+ %add = add nsw i64 %y, %x
+ %mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %add, i64 %x)
+ %mul.ov = extractvalue { i64, i1 } %mul, 1
+ br label %land.end
+
+land.end: ; preds = %land.rhs, %entry
+ %0 = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
+ %conv = zext i1 %0 to i16
+ ret i16 %conv
+}
>From ea09c0b64c399f8ce1db43447155fc064805ba4f Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Thu, 20 Feb 2025 16:59:08 +0100
Subject: [PATCH 2/5] Fix formatting
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 157716dc7ead9..b9bc32084ab2e 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3833,9 +3833,11 @@ static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
}
if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
- AggressiveInsts, Cost, Budget, TTI, AC, ZeroCostInstructions) ||
+ AggressiveInsts, Cost, Budget, TTI, AC,
+ ZeroCostInstructions) ||
!dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
- AggressiveInsts, Cost, Budget, TTI, AC, ZeroCostInstructions))
+ AggressiveInsts, Cost, Budget, TTI, AC,
+ ZeroCostInstructions))
return Changed;
}
>From 7616f45791559778f7cd2a43e6b9edc5949c753b Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Thu, 20 Feb 2025 17:07:04 +0100
Subject: [PATCH 3/5] Extend tests
---
.../SimplifyCFG/umul-extract-pattern.ll | 98 +++++++++++++++++--
1 file changed, 88 insertions(+), 10 deletions(-)
diff --git a/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll b/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
index 8a5e034998a25..7a01a6f6918fd 100644
--- a/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
+++ b/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
@@ -2,14 +2,14 @@
; RUN: opt -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s
target triple = "riscv64-unknown-unknown-elf"
-define i16 @func2(i64 %x, i64 %y) {
-; CHECK-LABEL: @func2(
+define i16 @basicScenario(i64 %x, i64 %y) {
+; CHECK-LABEL: @basicScenario(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X:%.*]])
; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[SPEC_SELECT]] to i16
+; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i16
; CHECK-NEXT: ret i16 [[CONV]]
;
entry:
@@ -27,15 +27,54 @@ land.end: ; preds = %land.rhs, %entry
ret i16 %conv
}
-define i16 @noHoist(i64 %x, i64 %y) {
-; CHECK-LABEL: @noHoist(
+define i16 @samePatternTwice(i64 %x, i64 %y) {
+; CHECK-LABEL: @samePatternTwice(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
-; CHECK-NEXT: [[ADD2:%.*]] = add nsw i64 [[Y]], [[X:%.*]]
-; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ADD2]], i64 [[X]])
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[LAND_END:%.*]], label [[LAND_RHS:%.*]]
+; CHECK: land.rhs:
+; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X:%.*]])
+; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; CHECK-NEXT: [[MUL2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X]])
+; CHECK-NEXT: [[MUL_OV2:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; CHECK-NEXT: br label [[LAND_END]]
+; CHECK: land.end:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[MUL_OV]], [[LAND_RHS]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[MUL_OV2]], [[LAND_RHS]] ]
+; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i16
+; CHECK-NEXT: [[CONV2:%.*]] = zext i1 [[TMP1]] to i16
+; CHECK-NEXT: [[TORET:%.*]] = add nsw i16 [[CONV]], [[CONV2]]
+; CHECK-NEXT: ret i16 [[CONV]]
+;
+entry:
+ %cmp.not = icmp eq i64 %y, 0
+ br i1 %cmp.not, label %land.end, label %land.rhs
+
+land.rhs: ; preds = %entry
+ %mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x)
+ %mul.ov = extractvalue { i64, i1 } %mul, 1
+ %mul2 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x)
+ %mul.ov2 = extractvalue { i64, i1 } %mul, 1
+ br label %land.end
+
+land.end: ; preds = %land.rhs, %entry
+ %0 = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
+ %1 = phi i1 [ false, %entry ], [ %mul.ov2, %land.rhs ]
+ %conv = zext i1 %0 to i16
+ %conv2 = zext i1 %1 to i16
+ %toRet = add nsw i16 %conv, %conv2
+ ret i16 %conv
+}
+
+define i16 @stillHoistNotTooExpensive(i64 %x, i64 %y) {
+; CHECK-LABEL: @stillHoistNotTooExpensive(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[Y]], [[X:%.*]]
+; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ADD]], i64 [[X]])
; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[SPEC_SELECT]] to i16
+; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i16
; CHECK-NEXT: ret i16 [[CONV]]
;
entry:
@@ -53,3 +92,42 @@ land.end: ; preds = %land.rhs, %entry
%conv = zext i1 %0 to i16
ret i16 %conv
}
+
+define i16 @noHoistTooExpensive(i64 %x, i64 %y) {
+; CHECK-LABEL: @noHoistTooExpensive(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[LAND_END:%.*]], label [[LAND_RHS:%.*]]
+; CHECK: land.rhs:
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[Y]], [[X:%.*]]
+; CHECK-NEXT: [[ADD2:%.*]] = add nsw i64 [[Y]], [[ADD]]
+; CHECK-NEXT: [[ADD3:%.*]] = add nsw i64 [[ADD]], [[ADD2]]
+; CHECK-NEXT: [[ADD4:%.*]] = add nsw i64 [[ADD2]], [[ADD3]]
+; CHECK-NEXT: [[ADD5:%.*]] = add nsw i64 [[ADD3]], [[ADD4]]
+; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ADD5]], i64 [[X]])
+; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; CHECK-NEXT: br label [[LAND_END]]
+; CHECK: land.end:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[MUL_OV]], [[LAND_RHS]] ]
+; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i16
+; CHECK-NEXT: ret i16 [[CONV]]
+;
+entry:
+ %cmp.not = icmp eq i64 %y, 0
+ br i1 %cmp.not, label %land.end, label %land.rhs
+
+land.rhs: ; preds = %entry
+ %add = add nsw i64 %y, %x
+ %add2 = add nsw i64 %y, %add
+ %add3 = add nsw i64 %add, %add2
+ %add4 = add nsw i64 %add2, %add3
+ %add5 = add nsw i64 %add3, %add4
+ %mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %add5, i64 %x)
+ %mul.ov = extractvalue { i64, i1 } %mul, 1
+ br label %land.end
+
+land.end: ; preds = %land.rhs, %entry
+ %0 = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
+ %conv = zext i1 %0 to i16
+ ret i16 %conv
+}
>From f80ec18374941e41061e4c7281d85a590cdeac55 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Fri, 21 Feb 2025 12:40:30 +0100
Subject: [PATCH 4/5] Fix test case with the same pattern twice
---
.../Transforms/SimplifyCFG/umul-extract-pattern.ll | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll b/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
index 7a01a6f6918fd..1dd3019bcb43f 100644
--- a/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
+++ b/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
@@ -31,16 +31,12 @@ define i16 @samePatternTwice(i64 %x, i64 %y) {
; CHECK-LABEL: @samePatternTwice(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
-; CHECK-NEXT: br i1 [[CMP_NOT]], label [[LAND_END:%.*]], label [[LAND_RHS:%.*]]
-; CHECK: land.rhs:
; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X:%.*]])
; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
; CHECK-NEXT: [[MUL2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X]])
-; CHECK-NEXT: [[MUL_OV2:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; CHECK-NEXT: br label [[LAND_END]]
-; CHECK: land.end:
-; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[MUL_OV]], [[LAND_RHS]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[MUL_OV2]], [[LAND_RHS]] ]
+; CHECK-NEXT: [[MUL_OV2:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV2]]
; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i16
; CHECK-NEXT: [[CONV2:%.*]] = zext i1 [[TMP1]] to i16
; CHECK-NEXT: [[TORET:%.*]] = add nsw i16 [[CONV]], [[CONV2]]
@@ -54,7 +50,7 @@ land.rhs: ; preds = %entry
%mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x)
%mul.ov = extractvalue { i64, i1 } %mul, 1
%mul2 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x)
- %mul.ov2 = extractvalue { i64, i1 } %mul, 1
+ %mul.ov2 = extractvalue { i64, i1 } %mul2, 1
br label %land.end
land.end: ; preds = %land.rhs, %entry
>From 11a9145f161fb7856f0d329ed9e19310b226926d Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Fri, 21 Feb 2025 12:47:47 +0100
Subject: [PATCH 5/5] Add a comment in the code trying to explain the situation
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index b9bc32084ab2e..f0de8a3a3d248 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -463,6 +463,11 @@ static bool dominatesMergePoint(
if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
return false;
+ // Overflow arithmetic instruction plus extract value are usually generated
+ // when a division is being replaced. But, in this case, the zero check may
+ // still be kept in the code. In that case it would be worth to hoist these
+ // two instruction out of the basic block. Let's treat this pattern as one
+ // single cheap instruction here!
WithOverflowInst *OverflowInst;
if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
ZeroCostInstructions.insert(OverflowInst);
More information about the llvm-commits
mailing list