[llvm] [AMDGPU] [GlobalIsel] Combine Fmul with Select into ldexp instruction. (PR #120104)
Vikash Gupta via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 16 08:18:13 PST 2024
https://github.com/vg0204 created https://github.com/llvm/llvm-project/pull/120104
This combine pattern perform the below transformation.
fmul x, select(y, A, B) -> ldexp (x, select i32 (y, a, b))
fmul x, select(y, -A, -B) -> ldexp ((fneg x), select i32 (y, a, b))
where, A=2^a & B=2^b ; a and b are integers.
It is a follow-up PR to implement the above combine for globalIsel, as the corresponding DAG combine has been done for SelectionDAG Isel (#111109)
>From 5cb5389956552f24abf26244b29b232d3b60e23b Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Mon, 16 Dec 2024 16:10:02 +0000
Subject: [PATCH] [AMDGPU] [GlobalIsel] Combine Fmul with Select into ldexp.
This combine pattern perform the below transformation.
fmul x, select(y, A, B) -> ldexp (x, select i32 (y, a, b))
fmul x, select(y, -A, -B) -> ldexp ((fneg x), select i32 (y, a, b))
where, A=2^a & B=2^b ; a and b are integers.
It is a follow-up PR to implement the above combine for globalIsel,
as it has been done for SelectionDAG Isel (PR-111109)
---
llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 10 +++
.../Target/AMDGPU/AMDGPUCombinerHelper.cpp | 72 +++++++++++++++++++
llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h | 4 ++
3 files changed, 86 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 985fa8f1deff94..c1eea0ad9b7073 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -124,6 +124,16 @@ def sign_extension_in_reg : GICombineRule<
[{ return matchCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }]),
(apply [{ applyCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }])>;
+// Do the following combines :
+// fmul x, select(y, A, B) -> ldexp (x, select i32 (y, a, b))
+// fmul x, select(y, -A, -B) -> ldexp ((fneg x), select i32 (y, a, b))
+def combine_fmul_with_select_to_ldexp : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_FMUL $dst, $x, $select):$root,
+ (G_SELECT $select, $y, $A, $B):$sel,
+ [{ return Helper.matchCombineFmulWithSelectToLdexp(*${root}, *${sel}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
let Predicates = [Has16BitInsts, NotHasMed3_16] in {
// For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
index e5a376ab7357c1..d582ee892a481e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
@@ -445,3 +445,75 @@ void AMDGPUCombinerHelper::applyExpandPromotedF16FMed3(MachineInstr &MI,
Builder.buildFMinNumIEEE(MI.getOperand(0), B1, C1);
MI.eraseFromParent();
}
+
+bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToLdexp(
+ MachineInstr &MI, MachineInstr &Sel,
+ std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FMUL);
+ assert(Sel.getOpcode() == TargetOpcode::G_SELECT);
+
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DestTy = MRI.getType(Dst);
+ LLT ScalarDestTy = DestTy.getScalarType();
+
+ if ((ScalarDestTy == LLT::float64() || ScalarDestTy == LLT::float32() ||
+ ScalarDestTy == LLT::float16()) &&
+ (MRI.hasOneNonDBGUse(Sel.getOperand(0).getReg()))) {
+ Register SelectCond = Sel.getOperand(1).getReg();
+ Register SelectTrue = Sel.getOperand(2).getReg();
+ Register SelectFalse = Sel.getOperand(3).getReg();
+
+ const auto SelectTrueCst =
+ DestTy.isVector()
+ ? getFConstantSplat(SelectTrue, MRI, /* allowUndef */ true)
+ : getFConstantVRegValWithLookThrough(SelectTrue, MRI);
+ if (!SelectTrueCst)
+ return false;
+ const auto SelectFalseCst =
+ DestTy.isVector()
+ ? getFConstantSplat(SelectFalse, MRI, /* allowUndef */ true)
+ : getFConstantVRegValWithLookThrough(SelectFalse, MRI);
+ if (!SelectFalseCst)
+ return false;
+
+ if (SelectTrueCst->Value.isNegative() != SelectFalseCst->Value.isNegative())
+ return false;
+
+ // For f32, only non-inline constants should be transformed.
+ const SIInstrInfo *TII =
+ (MI.getMF()->getSubtarget<GCNSubtarget>()).getInstrInfo();
+ if (ScalarDestTy == LLT::float32() &&
+ TII->isInlineConstant(SelectTrueCst->Value) &&
+ TII->isInlineConstant(SelectFalseCst->Value))
+ return false;
+
+ int SelectTrueVal = SelectTrueCst->Value.getExactLog2Abs();
+ if (SelectTrueVal == INT_MIN)
+ return false;
+ int SelectFalseVal = SelectFalseCst->Value.getExactLog2Abs();
+ if (SelectFalseVal == INT_MIN)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &Builder) {
+ LLT IntDestTy = DestTy.changeElementType(LLT::scalar(32));
+ auto NewSel =
+ Builder.buildSelect(IntDestTy, SelectCond,
+ Builder.buildConstant(IntDestTy, SelectTrueVal),
+ Builder.buildConstant(IntDestTy, SelectFalseVal));
+
+ if (SelectTrueCst->Value.isNegative()) {
+ auto NegX = Builder.buildFNeg(
+ DestTy, MI.getOperand(1).getReg(),
+ MRI.getVRegDef(MI.getOperand(1).getReg())->getFlags());
+ Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags());
+ } else {
+ Builder.buildFLdexp(Dst, MI.getOperand(1).getReg(), NewSel,
+ MI.getFlags());
+ }
+ };
+
+ return true;
+ }
+
+ return false;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
index 6510abe9d23218..df03a9435b3849 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
@@ -30,6 +30,10 @@ class AMDGPUCombinerHelper : public CombinerHelper {
Register Src1, Register Src2);
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0,
Register Src1, Register Src2);
+
+ bool matchCombineFmulWithSelectToLdexp(
+ MachineInstr &MI, MachineInstr &Sel,
+ std::function<void(MachineIRBuilder &)> &MatchInfo);
};
} // namespace llvm
More information about the llvm-commits
mailing list