[llvm] 4c48b3c - [GISel][CombinerHelper] Push freeze through non-poison-producing operands (#90618)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 23 01:15:57 PDT 2024
Author: Dhruv Chawla
Date: 2024-05-23T13:45:52+05:30
New Revision: 4c48b3cb5c681b34345e0dffe920649a5c26aaf4
URL: https://github.com/llvm/llvm-project/commit/4c48b3cb5c681b34345e0dffe920649a5c26aaf4
DIFF: https://github.com/llvm/llvm-project/commit/4c48b3cb5c681b34345e0dffe920649a5c26aaf4.diff
LOG: [GISel][CombinerHelper] Push freeze through non-poison-producing operands (#90618)
This combine matches the existing fold in InstCombine, i.e.
InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
It tries to push freeze through an operand if the operand has only one
maybe-poison operand and all other operands are guaranteed non-poison,
and if the operation itself cannot generate poison (eg. add with nsw can
generate poison, even with non-poison operands).
This is beneficial because it can potentially enable other optimizations
to occur that would otherwise be blocked because of the freeze.
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
llvm/include/llvm/CodeGen/MachineInstr.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/lib/CodeGen/GlobalISel/Utils.cpp
llvm/lib/Target/AArch64/AArch64Combine.td
llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
llvm/test/CodeGen/AArch64/pr58431.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index a9a33c7617d7d..2111e82e1a99d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -869,6 +869,9 @@ class CombinerHelper {
/// Combine insert vector element OOB.
bool matchInsertVectorElementOOB(MachineInstr &MI, BuildFnTy &MatchInfo);
+ bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 2a3145b635e6c..2b3efc3b609f0 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -34,6 +34,17 @@ class GenericMachineInstr : public MachineInstr {
static bool classof(const MachineInstr *MI) {
return isPreISelGenericOpcode(MI->getOpcode());
}
+
+ bool hasPoisonGeneratingFlags() const {
+ return getFlags() & (NoUWrap | NoSWrap | IsExact | Disjoint | NonNeg |
+ FmNoNans | FmNoInfs);
+ }
+
+ void dropPoisonGeneratingFlags() {
+ clearFlags(NoUWrap | NoSWrap | IsExact | Disjoint | NonNeg | FmNoNans |
+ FmNoInfs);
+ assert(!hasPoisonGeneratingFlags());
+ }
};
/// Provides common memory operand functionality.
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 2b0c5d166d88b..db48a0ae55145 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -416,6 +416,12 @@ class MachineInstr
Flags &= ~((uint32_t)Flag);
}
+ void clearFlags(unsigned flags) {
+ assert(isUInt<LLVM_MI_FLAGS_BITS>(flags) &&
+ "flags to be cleared are out of range for the Flags field");
+ Flags &= ~flags;
+ }
+
/// Return true if MI is in a bundle (but not the first MI in a bundle).
///
/// A bundle looks like this before it's finalized:
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 5d4b5a2479f6a..34698f195615b 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -220,6 +220,13 @@ def idempotent_prop : GICombineRule<
(match (idempotent_prop_frags $dst, $src)),
(apply (GIReplaceReg $dst, $src))>;
+// Convert freeze(Op(Op0, NonPoisonOps...)) to Op(freeze(Op0), NonPoisonOps...)
+// when Op0 is not guaranteed non-poison
+def push_freeze_to_prevent_poison_from_propagating : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_FREEZE $dst, $src):$root,
+ [{ return !isGuaranteedNotToBePoison(${src}.getReg(), MRI) && Helper.matchFreezeOfSingleMaybePoisonOperand(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
def extending_loads : GICombineRule<
(defs root:$root, extending_load_matchdata:$matchinfo),
@@ -1713,7 +1720,8 @@ def all_combines : GICombineGroup<[trivial_combines, vector_ops_combines,
sub_add_reg, select_to_minmax, redundant_binop_in_equality,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
- sext_trunc, zext_trunc, combine_shuffle_concat]>;
+ sext_trunc, zext_trunc, combine_shuffle_concat,
+ push_freeze_to_prevent_poison_from_propagating]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 22eb4a3e0d7cb..4cc602b5c8709 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -223,6 +223,70 @@ void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
replaceRegWith(MRI, DstReg, SrcReg);
}
+bool CombinerHelper::matchFreezeOfSingleMaybePoisonOperand(
+ MachineInstr &MI, BuildFnTy &MatchInfo) {
+ // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
+ Register DstOp = MI.getOperand(0).getReg();
+ Register OrigOp = MI.getOperand(1).getReg();
+
+ if (!MRI.hasOneNonDBGUse(OrigOp))
+ return false;
+
+ MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
+ // Even if only a single operand of the PHI is not guaranteed non-poison,
+ // moving freeze() backwards across a PHI can cause optimization issues for
+ // other users of that operand.
+ //
+ // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
+ // the source register is unprofitable because it makes the freeze() more
+ // strict than is necessary (it would affect the whole register instead of
+ // just the subreg being frozen).
+ if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
+ return false;
+
+ if (canCreateUndefOrPoison(OrigOp, MRI,
+ /*ConsiderFlagsAndMetadata=*/false))
+ return false;
+
+ std::optional<MachineOperand> MaybePoisonOperand;
+ for (MachineOperand &Operand : OrigDef->uses()) {
+ if (!Operand.isReg())
+ return false;
+
+ if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
+ continue;
+
+ if (!MaybePoisonOperand)
+ MaybePoisonOperand = Operand;
+ else {
+ // We have more than one maybe-poison operand. Moving the freeze is
+ // unsafe.
+ return false;
+ }
+ }
+
+ cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
+
+ // Eliminate freeze if all operands are guaranteed non-poison.
+ if (!MaybePoisonOperand) {
+ MatchInfo = [=](MachineIRBuilder &B) { MRI.replaceRegWith(DstOp, OrigOp); };
+ return true;
+ }
+
+ Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
+ LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
+
+ MatchInfo = [=](MachineIRBuilder &B) mutable {
+ B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
+ auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
+ replaceRegOpWith(
+ MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
+ Freeze.getReg(0));
+ replaceRegWith(MRI, DstOp, OrigOp);
+ };
+ return true;
+}
+
bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI,
SmallVector<Register> &Ops) {
assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index cd5dc0e01ed0e..f455482e02943 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1745,11 +1745,20 @@ static bool canCreateUndefOrPoison(Register Reg, const MachineRegisterInfo &MRI,
UndefPoisonKind Kind) {
MachineInstr *RegDef = MRI.getVRegDef(Reg);
+ if (auto *GMI = dyn_cast<GenericMachineInstr>(RegDef)) {
+ if (ConsiderFlagsAndMetadata && includesPoison(Kind) &&
+ GMI->hasPoisonGeneratingFlags())
+ return true;
+ } else {
+ // Conservatively return true.
+ return true;
+ }
+
switch (RegDef->getOpcode()) {
case TargetOpcode::G_FREEZE:
return false;
default:
- return true;
+ return !isa<GCastOp>(RegDef) && !isa<GBinOp>(RegDef);
}
}
@@ -1767,8 +1776,17 @@ static bool isGuaranteedNotToBeUndefOrPoison(Register Reg,
return true;
case TargetOpcode::G_IMPLICIT_DEF:
return !includesUndef(Kind);
- default:
- return false;
+ default: {
+ auto MOCheck = [&](const MachineOperand &MO) {
+ if (!MO.isReg())
+ return true;
+ return ::isGuaranteedNotToBeUndefOrPoison(MO.getReg(), MRI, Depth + 1,
+ Kind);
+ };
+ return !::canCreateUndefOrPoison(Reg, MRI,
+ /*ConsiderFlagsAndMetadata=*/true, Kind) &&
+ all_of(RegDef->uses(), MOCheck);
+ }
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 10cad6d192440..1c7f6b870d390 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -295,5 +295,6 @@ def AArch64PostLegalizerCombiner
ptr_add_immed_chain, overlapping_and,
split_store_zero_128, undef_combines,
select_to_minmax, or_to_bsp, combine_concat_vector,
- commute_constant_to_rhs]> {
+ commute_constant_to_rhs,
+ push_freeze_to_prevent_poison_from_propagating]> {
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
index 353c1550d6974..074d4ecbd8785 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
@@ -117,9 +117,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2
; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
- ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY1]](s64)
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %f
- ; CHECK-NEXT: %sel:_(s1) = G_OR %c, [[FREEZE]]
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
+ ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[FREEZE]](s64)
+ ; CHECK-NEXT: %sel:_(s1) = G_OR %c, %f
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
@@ -144,9 +144,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2
; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
- ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY1]](s64)
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %f
- ; CHECK-NEXT: %sel:_(s1) = G_OR %c, [[FREEZE]]
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
+ ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[FREEZE]](s64)
+ ; CHECK-NEXT: %sel:_(s1) = G_OR %c, %f
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
@@ -172,9 +172,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d2
; CHECK-NEXT: %c:_(<2 x s1>) = G_TRUNC [[COPY]](<2 x s32>)
- ; CHECK-NEXT: %f:_(<2 x s1>) = G_TRUNC [[COPY1]](<2 x s32>)
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s1>) = G_FREEZE %f
- ; CHECK-NEXT: %sel:_(<2 x s1>) = G_OR %c, [[FREEZE]]
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE [[COPY1]]
+ ; CHECK-NEXT: %f:_(<2 x s1>) = G_TRUNC [[FREEZE]](<2 x s32>)
+ ; CHECK-NEXT: %sel:_(<2 x s1>) = G_OR %c, %f
; CHECK-NEXT: %ext:_(<2 x s32>) = G_ANYEXT %sel(<2 x s1>)
; CHECK-NEXT: $d0 = COPY %ext(<2 x s32>)
%0:_(<2 x s32>) = COPY $d0
@@ -201,9 +201,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
- ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64)
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %t
- ; CHECK-NEXT: %sel:_(s1) = G_AND %c, [[FREEZE]]
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
+ ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[FREEZE]](s64)
+ ; CHECK-NEXT: %sel:_(s1) = G_AND %c, %t
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
@@ -229,9 +229,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
- ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64)
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %t
- ; CHECK-NEXT: %sel:_(s1) = G_AND %c, [[FREEZE]]
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
+ ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[FREEZE]](s64)
+ ; CHECK-NEXT: %sel:_(s1) = G_AND %c, %t
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
@@ -257,11 +257,11 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
- ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
+ ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[FREEZE]](s64)
; CHECK-NEXT: %one:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR %c, %one
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %t
- ; CHECK-NEXT: %sel:_(s1) = G_OR [[XOR]], [[FREEZE]]
+ ; CHECK-NEXT: %sel:_(s1) = G_OR [[XOR]], %t
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
@@ -287,11 +287,11 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2
; CHECK-NEXT: %c:_(s1) = G_TRUNC [[COPY]](s64)
- ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY1]](s64)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
+ ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[FREEZE]](s64)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR %c, [[C]]
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %f
- ; CHECK-NEXT: %sel:_(s1) = G_AND [[XOR]], [[FREEZE]]
+ ; CHECK-NEXT: %sel:_(s1) = G_AND [[XOR]], %f
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
; CHECK-NEXT: $w0 = COPY %ext(s32)
%0:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/pr58431.ll b/llvm/test/CodeGen/AArch64/pr58431.ll
index dcd97597ae409..e87d8f7874d62 100644
--- a/llvm/test/CodeGen/AArch64/pr58431.ll
+++ b/llvm/test/CodeGen/AArch64/pr58431.ll
@@ -4,8 +4,8 @@
define i32 @f(i64 %0) {
; CHECK-LABEL: f:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #10
-; CHECK-NEXT: mov w9, w0
+; CHECK-NEXT: mov w8, #10 // =0xa
+; CHECK-NEXT: and x9, x0, #0xffffffff
; CHECK-NEXT: udiv x10, x9, x8
; CHECK-NEXT: msub x0, x10, x8, x9
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
More information about the llvm-commits
mailing list