[llvm] [SimplifyCFG] Treat umul + extract pattern as cheap single instruction. (PR #124933)
Gábor Spaits via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 2 15:03:41 PST 2025
https://github.com/spaits updated https://github.com/llvm/llvm-project/pull/124933
>From ef004473d02e867736e6940f05d03959eb480ebd Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Wed, 29 Jan 2025 14:31:20 +0100
Subject: [PATCH 1/9] [Transform] Treat umul + extract pattern as cheap single
instruction.
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 22 +++++++++++----
.../SimplifyCFG/umul-extract-pattern.ll | 27 +++++++++++++++++++
2 files changed, 44 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 12dd49da279b9c5..5197db70285cec5 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3290,9 +3290,11 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
bool HoistLoadsStores = HoistLoadsStoresWithCondFaulting &&
Options.HoistLoadsStoresWithCondFaulting;
SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
+ InstructionCost BlockCostSoFar = 0;
Value *SpeculatedStoreValue = nullptr;
StoreInst *SpeculatedStore = nullptr;
EphemeralValueTracker EphTracker;
+ bool PatternFound = false;
for (Instruction &I : reverse(drop_end(*ThenBB))) {
// Skip debug info.
if (isa<DbgInfoIntrinsic>(I)) {
@@ -3329,9 +3331,6 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
else
++SpeculatedInstructions;
- if (SpeculatedInstructions > 1)
- return false;
-
// Don't hoist the instruction if it's unsafe or expensive.
if (!IsSafeCheapLoadStore &&
!isSafeToSpeculativelyExecute(&I, BI, Options.AC) &&
@@ -3339,10 +3338,23 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
(SpeculatedStoreValue =
isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
return false;
- if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
- computeSpeculationCost(&I, TTI) >
+
+ if (match(&I,
+ m_ExtractValue<1>(m_OneUse(
+ m_Intrinsic<Intrinsic::umul_with_overflow>(m_Value())))) &&
+ ThenBB->size() <= 3) {
+ PatternFound = true;
+ }
+
+ BlockCostSoFar += computeSpeculationCost(&I, TTI);
+ if (! PatternFound && !IsSafeCheapLoadStore && !SpeculatedStoreValue &&
+ BlockCostSoFar >
PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
return false;
+ // If we don't find any pattern, that must be cheap, then only speculatively
+ // execute a single instruction (not counting the terminator).
+ if (!PatternFound && SpeculatedInstructions > 1)
+ return false;
// Store the store speculation candidate.
if (!SpeculatedStore && SpeculatedStoreValue)
diff --git a/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll b/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
new file mode 100644
index 000000000000000..72610218c314ecb
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s
+
+define dso_local signext range(i16 0, 2) i16 @func2(i64 noundef %x, i64 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: @func2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
+; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X:%.*]])
+; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[SPEC_SELECT]] to i16
+; CHECK-NEXT: ret i16 [[CONV]]
+;
+entry:
+ %cmp.not = icmp eq i64 %y, 0
+ br i1 %cmp.not, label %land.end, label %land.rhs
+
+land.rhs: ; preds = %entry
+ %mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x)
+ %mul.ov = extractvalue { i64, i1 } %mul, 1
+ br label %land.end
+
+land.end: ; preds = %land.rhs, %entry
+ %0 = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
+ %conv = zext i1 %0 to i16
+ ret i16 %conv
+}
>From cfe1d1877bb1f964791dd16f56f8ecadc3d56449 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 30 Jan 2025 14:04:55 +0100
Subject: [PATCH 2/9] Fix older tests too
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 5 ++-
.../X86/unsigned-multiply-overflow-check.ll | 34 +++++--------------
2 files changed, 10 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 5197db70285cec5..b13e4a1086895d7 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3342,12 +3342,11 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
if (match(&I,
m_ExtractValue<1>(m_OneUse(
m_Intrinsic<Intrinsic::umul_with_overflow>(m_Value())))) &&
- ThenBB->size() <= 3) {
+ ThenBB->size() <= 3)
PatternFound = true;
- }
BlockCostSoFar += computeSpeculationCost(&I, TTI);
- if (! PatternFound && !IsSafeCheapLoadStore && !SpeculatedStoreValue &&
+ if (!PatternFound && !IsSafeCheapLoadStore && !SpeculatedStoreValue &&
BlockCostSoFar >
PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
return false;
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll b/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll
index 7bcb6ce17df0e40..9858591dfc7006d 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll
@@ -45,26 +45,17 @@ define i1 @will_not_overflow(i64 %arg, i64 %arg1) {
; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_not_overflow(
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: bb:
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
-; INSTCOMBINESIMPLIFYCFGONLY: bb2:
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br label [[BB5]]
-; INSTCOMBINESIMPLIFYCFGONLY: bb5:
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 false, i1 [[MUL_OV]]
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: ret i1 [[T6]]
;
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_not_overflow(
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb2:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[ARG1:%.*]] = freeze i64 [[ARG2:%.*]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1]])
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br label [[BB5]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb5:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[MUL_OV]]
;
bb:
%t0 = icmp eq i64 %arg, 0
@@ -112,28 +103,19 @@ define i1 @will_overflow(i64 %arg, i64 %arg1) {
; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_overflow(
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: bb:
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
-; INSTCOMBINESIMPLIFYCFGONLY: bb2:
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br label [[BB5]]
-; INSTCOMBINESIMPLIFYCFGONLY: bb5:
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 true, i1 [[PHI_BO]]
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: ret i1 [[T6]]
;
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_overflow(
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb2:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[ARG1:%.*]] = freeze i64 [[ARG2:%.*]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1]])
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br label [[BB5]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb5:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[PHI_BO]]
;
bb:
%t0 = icmp eq i64 %arg, 0
>From 57b31472c714cd4dce7c2f75a839094d5d3267b7 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Fri, 31 Jan 2025 22:04:58 +0100
Subject: [PATCH 3/9] Add a max instructions to be hoisted limit dependent on
the found patterns
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 23 +++++++++++++++++++----
1 file changed, 19 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index b13e4a1086895d7..c530db99f4db9c4 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3286,7 +3286,21 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
+ // The number of already examined instructions. Debug instructions don't
+ // count!
unsigned SpeculatedInstructions = 0;
+ // By default the number of instructions that may be speculatevly executed is
+ // one. Whenever a pattern is found in the basic block, that is cheap for sure
+ // we increase this number to the size of the pattern (how many instructions
+ // are there in that pattern).
+ unsigned MaxSpeculatedInstructionsToHoist = 1;
+ // In case we have found a cheap pattern, we don't want to do cost checking
+ // anymore. We are sure we want to hoist the pattern. To know, that we are
+ // only hoisting the cheap pattern only and not other expensive instructions
+ // too, we have the `MaxSpeculatedInstructionsToHoist` variable to track that
+ // the basic block truly only contains that pattern.
+ bool PatternFound = false;
+
bool HoistLoadsStores = HoistLoadsStoresWithCondFaulting &&
Options.HoistLoadsStoresWithCondFaulting;
SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
@@ -3294,7 +3308,6 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
Value *SpeculatedStoreValue = nullptr;
StoreInst *SpeculatedStore = nullptr;
EphemeralValueTracker EphTracker;
- bool PatternFound = false;
for (Instruction &I : reverse(drop_end(*ThenBB))) {
// Skip debug info.
if (isa<DbgInfoIntrinsic>(I)) {
@@ -3341,9 +3354,10 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
if (match(&I,
m_ExtractValue<1>(m_OneUse(
- m_Intrinsic<Intrinsic::umul_with_overflow>(m_Value())))) &&
- ThenBB->size() <= 3)
+ m_Intrinsic<Intrinsic::umul_with_overflow>(m_Value()))))) {
+ MaxSpeculatedInstructionsToHoist = 3;
PatternFound = true;
+ }
BlockCostSoFar += computeSpeculationCost(&I, TTI);
if (!PatternFound && !IsSafeCheapLoadStore && !SpeculatedStoreValue &&
@@ -3352,7 +3366,8 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
return false;
// If we don't find any pattern, that must be cheap, then only speculatively
// execute a single instruction (not counting the terminator).
- if (!PatternFound && SpeculatedInstructions > 1)
+ if (!PatternFound &&
+ SpeculatedInstructions > MaxSpeculatedInstructionsToHoist)
return false;
// Store the store speculation candidate.
>From b57135a8e53e0593e0b87f239e5869fbed66ec18 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Fri, 31 Jan 2025 22:08:44 +0100
Subject: [PATCH 4/9] Remove redundant check
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index c530db99f4db9c4..443abecb7827048 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3364,10 +3364,10 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
BlockCostSoFar >
PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
return false;
- // If we don't find any pattern, that must be cheap, then only speculatively
- // execute a single instruction (not counting the terminator).
- if (!PatternFound &&
- SpeculatedInstructions > MaxSpeculatedInstructionsToHoist)
+
+ // The number of instrcutions to be speculatively executed is limited.
+ // This limit is dependent on the found patterns.
+ if (SpeculatedInstructions > MaxSpeculatedInstructionsToHoist)
return false;
// Store the store speculation candidate.
>From db01e7f4856b30188c7bbc7e1167612d5e1fbb95 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Sat, 1 Feb 2025 09:27:54 +0100
Subject: [PATCH 5/9] Don't accumulate instruction cost
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 22 ++++++++++------------
1 file changed, 10 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 443abecb7827048..7a203f0967546b3 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3299,12 +3299,11 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
// only hoisting the cheap pattern only and not other expensive instructions
// too, we have the `MaxSpeculatedInstructionsToHoist` variable to track that
// the basic block truly only contains that pattern.
- bool PatternFound = false;
+ bool PartialInst = false;
bool HoistLoadsStores = HoistLoadsStoresWithCondFaulting &&
Options.HoistLoadsStoresWithCondFaulting;
SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
- InstructionCost BlockCostSoFar = 0;
Value *SpeculatedStoreValue = nullptr;
StoreInst *SpeculatedStore = nullptr;
EphemeralValueTracker EphTracker;
@@ -3339,8 +3338,15 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
HoistLoadsStoresWithCondFaultingThreshold;
// Not count load/store into cost if target supports conditional faulting
// b/c it's cheap to speculate it.
+ WithOverflowInst *OI;
if (IsSafeCheapLoadStore)
SpeculatedConditionalLoadsStores.push_back(&I);
+ else if (match(&I,
+ m_ExtractValue<1>(m_OneUse(
+ m_WithOverflowInst(OI))))) {
+ MaxSpeculatedInstructionsToHoist = 3;
+ PartialInst = true;
+ }
else
++SpeculatedInstructions;
@@ -3352,16 +3358,8 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
return false;
- if (match(&I,
- m_ExtractValue<1>(m_OneUse(
- m_Intrinsic<Intrinsic::umul_with_overflow>(m_Value()))))) {
- MaxSpeculatedInstructionsToHoist = 3;
- PatternFound = true;
- }
-
- BlockCostSoFar += computeSpeculationCost(&I, TTI);
- if (!PatternFound && !IsSafeCheapLoadStore && !SpeculatedStoreValue &&
- BlockCostSoFar >
+ if (!PartialInst && !IsSafeCheapLoadStore && !SpeculatedStoreValue &&
+ computeSpeculationCost(&I, TTI) >
PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
return false;
>From 19bbea2d5805d17e99a5e1015773d945c2b879e7 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Sun, 2 Feb 2025 20:41:55 +0100
Subject: [PATCH 6/9] Use correct pattern length, count the matches instruction
and test the case when more instructions are present in the basic block
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 19 ++++----
.../SimplifyCFG/umul-extract-pattern.ll | 48 +++++++++++++++++++
2 files changed, 58 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 7a203f0967546b3..cf5286543e58e05 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3330,23 +3330,24 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
if (EphTracker.track(&I))
continue;
- // Only speculatively execute a single instruction (not counting the
- // terminator) for now.
bool IsSafeCheapLoadStore = HoistLoadsStores &&
isSafeCheapLoadStore(&I, TTI) &&
SpeculatedConditionalLoadsStores.size() <
HoistLoadsStoresWithCondFaultingThreshold;
+
+ // Overflow arithmetic instruction plus extract value are usually generated
+ // when a division is being replaced. In that case hoist these two
+ // instructions out of this basic block, and let later optimizations
+ // take care of the unnecesary zero checks.
+ WithOverflowInst *OverflowI;
+ if (match(&I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowI))))) {
+ MaxSpeculatedInstructionsToHoist = 2;
+ PartialInst = true;
+ }
// Not count load/store into cost if target supports conditional faulting
// b/c it's cheap to speculate it.
- WithOverflowInst *OI;
if (IsSafeCheapLoadStore)
SpeculatedConditionalLoadsStores.push_back(&I);
- else if (match(&I,
- m_ExtractValue<1>(m_OneUse(
- m_WithOverflowInst(OI))))) {
- MaxSpeculatedInstructionsToHoist = 3;
- PartialInst = true;
- }
else
++SpeculatedInstructions;
diff --git a/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll b/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
index 72610218c314ecb..3645d4dc9fd28b8 100644
--- a/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
+++ b/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
@@ -25,3 +25,51 @@ land.end: ; preds = %land.rhs, %entry
%conv = zext i1 %0 to i16
ret i16 %conv
}
+
+define dso_local signext range(i32 0, 1024) i32 @noHoist(i64 noundef %x, i64 noundef %y) local_unnamed_addr #0 {
+; CHECK-LABEL: @noHoist(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[LAND_END:%.*]], label [[LAND_RHS:%.*]]
+; CHECK: land.rhs:
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[TMP1]]
+; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[ADD]]
+; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X:%.*]])
+; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; CHECK-NEXT: br label [[LAND_END]]
+; CHECK: land.end:
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[MUL_OV]], [[LAND_RHS]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 21, [[ENTRY]] ], [ [[ADD]], [[LAND_RHS]] ]
+; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[SPEC_SELECT]] to i32
+; CHECK-NEXT: [[RES:%.*]] = add nsw i32 [[TMP3]], [[CONV]]
+; CHECK-NEXT: ret i32 [[CONV]]
+;
+entry:
+ %i = alloca i32, align 4
+ %a = alloca i32, align 4
+ %0 = load i32, ptr %i, align 4
+ %1 = load i32, ptr %a, align 4
+
+ %cmp.not = icmp eq i64 %y, 0
+ br i1 %cmp.not, label %land.end, label %land.rhs
+
+land.rhs: ; preds = %entry
+ %add = add nsw i32 %0, %1
+ %add2 = add nsw i32 %add, %1
+ %add3 = add nsw i32 %add2, %add
+ %mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x)
+ %mul.ov = extractvalue { i64, i1 } %mul, 1
+ br label %land.end
+
+land.end: ; preds = %land.rhs, %entry
+ %42 = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
+ %99 = phi i32 [ 21, %entry ], [ %add, %land.rhs]
+ %conv = zext i1 %42 to i32
+ %res = add nsw i32 %99, %conv
+ ret i32 %conv
+}
>From ad008706c16d18491538fcfbe53dce734aacd4f6 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Sun, 2 Feb 2025 22:16:24 +0100
Subject: [PATCH 7/9] Fix a typo
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index cf5286543e58e05..63f7029885fa6a0 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3336,9 +3336,9 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
HoistLoadsStoresWithCondFaultingThreshold;
// Overflow arithmetic instruction plus extract value are usually generated
- // when a division is being replaced. In that case hoist these two
- // instructions out of this basic block, and let later optimizations
- // take care of the unnecesary zero checks.
+ // when a division is being replaced, but the zero check may still be there.
+ // In that case hoist these two instructions out of this basic block, and
+ // let later optimizations take care of the unnecessary zero checks.
WithOverflowInst *OverflowI;
if (match(&I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowI))))) {
MaxSpeculatedInstructionsToHoist = 2;
>From 56fc65c323b008122f7a34b78c96d7ce89ce7fd1 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Sun, 2 Feb 2025 22:29:26 +0100
Subject: [PATCH 8/9] Simplify a test
---
llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll b/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
index 3645d4dc9fd28b8..e1afd0e279ecf74 100644
--- a/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
+++ b/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
@@ -38,7 +38,6 @@ define dso_local signext range(i32 0, 1024) i32 @noHoist(i64 noundef %x, i64 nou
; CHECK: land.rhs:
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[TMP1]]
-; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[ADD]]
; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X:%.*]])
; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
; CHECK-NEXT: br label [[LAND_END]]
@@ -61,7 +60,6 @@ entry:
land.rhs: ; preds = %entry
%add = add nsw i32 %0, %1
%add2 = add nsw i32 %add, %1
- %add3 = add nsw i32 %add2, %add
%mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x)
%mul.ov = extractvalue { i64, i1 } %mul, 1
br label %land.end
>From 6dade290797ae1852c8c1f816a3c856b1493dfdd Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Mon, 3 Feb 2025 00:02:47 +0100
Subject: [PATCH 9/9] Simplify test
---
.../SimplifyCFG/umul-extract-pattern.ll | 43 ++++++-------------
1 file changed, 12 insertions(+), 31 deletions(-)
diff --git a/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll b/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
index e1afd0e279ecf74..2c54d02c136788b 100644
--- a/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
+++ b/llvm/test/Transforms/SimplifyCFG/umul-extract-pattern.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s
-define dso_local signext range(i16 0, 2) i16 @func2(i64 noundef %x, i64 noundef %y) local_unnamed_addr #0 {
+define i16 @func2(i64 %x, i64 %y) {
; CHECK-LABEL: @func2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
@@ -26,48 +26,29 @@ land.end: ; preds = %land.rhs, %entry
ret i16 %conv
}
-define dso_local signext range(i32 0, 1024) i32 @noHoist(i64 noundef %x, i64 noundef %y) local_unnamed_addr #0 {
+define i16 @noHoist(i64 %x, i64 %y) {
; CHECK-LABEL: @noHoist(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
-; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0
-; CHECK-NEXT: br i1 [[CMP_NOT]], label [[LAND_END:%.*]], label [[LAND_RHS:%.*]]
-; CHECK: land.rhs:
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
-; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[TMP1]]
-; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X:%.*]])
+; CHECK-NEXT: [[ADD2:%.*]] = add nsw i64 [[Y]], [[X:%.*]]
+; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ADD2]], i64 [[X]])
; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; CHECK-NEXT: br label [[LAND_END]]
-; CHECK: land.end:
-; CHECK-NEXT: [[SPEC_SELECT:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[MUL_OV]], [[LAND_RHS]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 21, [[ENTRY]] ], [ [[ADD]], [[LAND_RHS]] ]
-; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[SPEC_SELECT]] to i32
-; CHECK-NEXT: [[RES:%.*]] = add nsw i32 [[TMP3]], [[CONV]]
-; CHECK-NEXT: ret i32 [[CONV]]
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[SPEC_SELECT]] to i16
+; CHECK-NEXT: ret i16 [[CONV]]
;
entry:
- %i = alloca i32, align 4
- %a = alloca i32, align 4
- %0 = load i32, ptr %i, align 4
- %1 = load i32, ptr %a, align 4
-
%cmp.not = icmp eq i64 %y, 0
br i1 %cmp.not, label %land.end, label %land.rhs
land.rhs: ; preds = %entry
- %add = add nsw i32 %0, %1
- %add2 = add nsw i32 %add, %1
- %mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x)
+ %add = add nsw i64 %y, %x
+ %mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %add, i64 %x)
%mul.ov = extractvalue { i64, i1 } %mul, 1
br label %land.end
land.end: ; preds = %land.rhs, %entry
- %42 = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
- %99 = phi i32 [ 21, %entry ], [ %add, %land.rhs]
- %conv = zext i1 %42 to i32
- %res = add nsw i32 %99, %conv
- ret i32 %conv
+ %0 = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ]
+ %conv = zext i1 %0 to i16
+ ret i16 %conv
}
More information about the llvm-commits
mailing list