[llvm] GlobalISel needs fdiv 1 / sqrt(x) to rsq combine (PR #78673)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 8 01:37:17 PST 2024
================
@@ -337,32 +337,24 @@ bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
return false;
}
-bool AMDGPUPostLegalizerCombinerImpl::matchFDivSqrtToRsq(
+bool AMDGPUPostLegalizerCombinerImpl::matchFDivSqrtToRsqF16(
MachineInstr &MI) const {
Register Dst = MI.getOperand(0).getReg();
Register Sqrt = MI.getOperand(2).getReg();
LLT DstTy = MRI.getType(Dst);
- const MachineFunction &MF = B.getMF();
- bool AllowInaccurateRsq =
- MI.getFlag(MachineInstr::FmAfn) || MF.getTarget().Options.UnsafeFPMath;
if (!MRI.hasOneUse(Sqrt)) {
return false;
}
- // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to
- // the CI documentation has a worst case error of 1 ulp.
- // OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to
- // use it as long as we aren't trying to use denormals.
- //
- // v_rcp_f16 and v_rsq_f16 DO support denormals and 0.51ulp.
- if (!AllowInaccurateRsq && DstTy != LLT::scalar(16)) {
- return false;
+ // f32/f64 rsq is handled in AMDGPUCodeGenPrepare
+ // only match if operand type is f16
+ // v_rsq_f16 supports denormals and 0.51ulp.
+ if (DstTy == LLT::scalar(16)) {
----------------
arsenm wrote:
can move the type check into the pattern
https://github.com/llvm/llvm-project/pull/78673
More information about the llvm-commits
mailing list