[llvm] 8ee1cc8 - AMDGPU: Fold out sign bit ops on frexp_exp
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 6 07:26:26 PDT 2023
Author: Matt Arsenault
Date: 2023-07-06T10:26:21-04:00
New Revision: 8ee1cc82c9e54902a495204a537d2152b68ec757
URL: https://github.com/llvm/llvm-project/commit/8ee1cc82c9e54902a495204a537d2152b68ec757
DIFF: https://github.com/llvm/llvm-project/commit/8ee1cc82c9e54902a495204a537d2152b68ec757.diff
LOG: AMDGPU: Fold out sign bit ops on frexp_exp
The sign bit has no impact on the exponent, so strip these away. Saves
on the source modifier encoding cost. I left the GlobalISel handling
until there's a resolution to issue #62628.
We should do this in instcombine too, but legalization should be
introducing more frexps than it currently is where this would occur.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 8d125889121759..3ac37f3cad0756 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1496,6 +1496,17 @@ static SDValue peekFNeg(SDValue Val) {
return Val;
}
+
+static SDValue peekFPSignOps(SDValue Val) {
+ if (Val.getOpcode() == ISD::FNEG)
+ Val = Val.getOperand(0);
+ if (Val.getOpcode() == ISD::FABS)
+ Val = Val.getOperand(0);
+ if (Val.getOpcode() == ISD::FCOPYSIGN)
+ Val = Val.getOperand(0);
+ return Val;
+}
+
SDValue AMDGPUTargetLowering::combineFMinMaxLegacyImpl(
const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True,
SDValue False, SDValue CC, DAGCombinerInfo &DCI) const {
@@ -3664,6 +3675,17 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
SDValue Src = N->getOperand(1);
return Src.isUndef() ? Src : SDValue();
}
+ case Intrinsic::amdgcn_frexp_exp: {
+ // frexp_exp (fneg x) -> frexp_exp x
+ // frexp_exp (fabs x) -> frexp_exp x
+ // frexp_exp (fneg (fabs x)) -> frexp_exp x
+ SDValue Src = N->getOperand(1);
+ SDValue PeekSign = peekFPSignOps(Src);
+ if (PeekSign == Src)
+ return SDValue();
+ return SDValue(DCI.DAG.UpdateNodeOperands(N, N->getOperand(0), PeekSign),
+ 0);
+ }
default:
return SDValue();
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll
index fc4b75ba1bf9fd..9fd47cf8078cb8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll
@@ -2,6 +2,7 @@
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare float @llvm.fabs.f32(float) #0
+declare float @llvm.copysign.f32(float, float) #0
declare double @llvm.fabs.f64(double) #0
declare i32 @llvm.amdgcn.frexp.exp.i32.f32(float) #0
declare i32 @llvm.amdgcn.frexp.exp.i32.f64(double) #0
@@ -15,7 +16,7 @@ define amdgpu_kernel void @s_test_frexp_exp_f32(ptr addrspace(1) %out, float %sr
}
; GCN-LABEL: {{^}}s_test_fabs_frexp_exp_f32:
-; GCN: v_frexp_exp_i32_f32_e64 {{v[0-9]+}}, |{{s[0-9]+}}|
+; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
define amdgpu_kernel void @s_test_fabs_frexp_exp_f32(ptr addrspace(1) %out, float %src) #1 {
%fabs.src = call float @llvm.fabs.f32(float %src)
%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %fabs.src)
@@ -24,7 +25,7 @@ define amdgpu_kernel void @s_test_fabs_frexp_exp_f32(ptr addrspace(1) %out, floa
}
; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_exp_f32:
-; GCN: v_frexp_exp_i32_f32_e64 {{v[0-9]+}}, -|{{s[0-9]+}}|
+; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f32(ptr addrspace(1) %out, float %src) #1 {
%fabs.src = call float @llvm.fabs.f32(float %src)
%fneg.fabs.src = fneg float %fabs.src
@@ -33,6 +34,15 @@ define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f32(ptr addrspace(1) %out,
ret void
}
+; GCN-LABEL: {{^}}s_test_copysign_frexp_exp_f32:
+; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+define amdgpu_kernel void @s_test_copysign_frexp_exp_f32(ptr addrspace(1) %out, float %src, float %sign) #1 {
+ %copysign = call float @llvm.copysign.f32(float %src, float %sign)
+ %frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %copysign)
+ store i32 %frexp.exp, ptr addrspace(1) %out
+ ret void
+}
+
; GCN-LABEL: {{^}}s_test_frexp_exp_f64:
; GCN: v_frexp_exp_i32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @s_test_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 {
@@ -42,7 +52,7 @@ define amdgpu_kernel void @s_test_frexp_exp_f64(ptr addrspace(1) %out, double %s
}
; GCN-LABEL: {{^}}s_test_fabs_frexp_exp_f64:
-; GCN: v_frexp_exp_i32_f64_e64 {{v[0-9]+}}, |{{s\[[0-9]+:[0-9]+\]}}|
+; GCN: v_frexp_exp_i32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @s_test_fabs_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 {
%fabs.src = call double @llvm.fabs.f64(double %src)
%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double %fabs.src)
@@ -51,7 +61,7 @@ define amdgpu_kernel void @s_test_fabs_frexp_exp_f64(ptr addrspace(1) %out, doub
}
; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_exp_f64:
-; GCN: v_frexp_exp_i32_f64_e64 {{v[0-9]+}}, -|{{s\[[0-9]+:[0-9]+\]}}|
+; GCN: v_frexp_exp_i32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 {
%fabs.src = call double @llvm.fabs.f64(double %src)
%fneg.fabs.src = fneg double %fabs.src
More information about the llvm-commits
mailing list