[llvm] c177051 - AMDGPU: Restrict foldFreeOpFromSelect combine based on legal source mods
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 19 18:06:00 PST 2023
Author: Matt Arsenault
Date: 2023-02-19T22:05:54-04:00
New Revision: c177051f60e89b16533f5fca1db033507862b386
URL: https://github.com/llvm/llvm-project/commit/c177051f60e89b16533f5fca1db033507862b386
DIFF: https://github.com/llvm/llvm-project/commit/c177051f60e89b16533f5fca1db033507862b386.diff
LOG: AMDGPU: Restrict foldFreeOpFromSelect combine based on legal source mods
Provides a small code size savings for some f32 cases.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index c74be2d19cfd4..0aa6b22e2f9ce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -597,6 +597,14 @@ static bool opMustUseVOP3Encoding(const SDNode *N, MVT VT) {
VT == MVT::f64;
}
+/// Return true if v_cndmask_b32 will support fabs/fneg source modifiers for the
+/// type for ISD::SELECT.
+LLVM_READONLY
+static bool selectSupportsSourceMods(const SDNode *N) {
+ // TODO: Only applies if select will be vector
+ return N->getValueType(0) == MVT::f32;
+}
+
// Most FP instructions support source modifiers, but this could be refined
// slightly.
LLVM_READONLY
@@ -631,8 +639,7 @@ static bool hasSourceMods(const SDNode *N) {
}
}
case ISD::SELECT:
- // TODO: Only applies if select will be vector
- return N->getValueType(0) == MVT::f32;
+ return selectSupportsSourceMods(N);
default:
return true;
}
@@ -3758,7 +3765,8 @@ AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
// TODO: Support vector constants.
ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
- if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS) {
+ if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS &&
+ !selectSupportsSourceMods(N.getNode())) {
SDLoc SL(N);
// If one side is an fneg/fabs and the other is a constant, we can push the
// fneg/fabs down. If it's an fabs, the constant needs to be non-negative.
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
index 3e2d5f6ab6801..e76f6024cc03f 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
@@ -205,8 +205,8 @@ define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) #0 {
; GCN: buffer_load_dword [[X:v[0-9]+]]
; GCN: buffer_load_dword [[Y:v[0-9]+]]
-; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
-; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]]
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, |[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
+; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 {
%x = load volatile float, ptr addrspace(1) undef
%y = load volatile float, ptr addrspace(1) undef
@@ -225,8 +225,8 @@ define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 {
; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
-; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
-; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]]
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, |[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
+; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
define amdgpu_kernel void @add_select_posk_fabs_f32(i32 %c) #0 {
%x = load volatile float, ptr addrspace(1) undef
%y = load volatile float, ptr addrspace(1) undef
@@ -677,7 +677,7 @@ define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) #0 {
; GCN: buffer_load_dword [[X:v[0-9]+]]
; GCN: buffer_load_dword [[Y:v[0-9]+]]
-; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, s
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 {
%x = load volatile float, ptr addrspace(1) undef
@@ -696,8 +696,8 @@ define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 {
; GCN: buffer_load_dword [[Y:v[0-9]+]]
; GCN: s_cmp_lg_u32
-; GCN: s_cselect_b64 s[0:1], -1, 0
-; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, s[0:1]
+; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, [[VCC]]
; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
define amdgpu_kernel void @mul_select_negk_negfabs_f32(i32 %c) #0 {
%x = load volatile float, ptr addrspace(1) undef
@@ -953,8 +953,9 @@ define amdgpu_kernel void @mul_select_posk_0_negfabs_f32(i32 %c) #0 {
; GCN: buffer_load_dword [[X:v[0-9]+]]
; GCN: buffer_load_dword [[Y:v[0-9]+]]
-; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0, [[X]], vcc
-; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
+; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[NEG0]], -|[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
+; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
define amdgpu_kernel void @mul_select_negfabs_negk_0_f32(i32 %c) #0 {
%x = load volatile float, ptr addrspace(1) undef
%y = load volatile float, ptr addrspace(1) undef
@@ -971,10 +972,11 @@ define amdgpu_kernel void @mul_select_negfabs_negk_0_f32(i32 %c) #0 {
; GCN: buffer_load_dword [[X:v[0-9]+]]
; GCN: buffer_load_dword [[Y:v[0-9]+]]
+; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
; GCN: s_cmp_lg_u32
-; GCN: s_cselect_b64 vcc, -1, 0
-; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0, [[X]], vcc
-; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
+; GCN: s_cselect_b64 s[0:1], -1, 0
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[NEG0]], -|[[X]]|, s[0:1]
+; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
define amdgpu_kernel void @mul_select_negk_0_negfabs_f32(i32 %c) #0 {
%x = load volatile float, ptr addrspace(1) undef
%y = load volatile float, ptr addrspace(1) undef
More information about the llvm-commits
mailing list