[llvm] c177051 - AMDGPU: Restrict foldFreeOpFromSelect combine based on legal source mods

Sun Feb 19 18:06:00 PST 2023

Author: Matt Arsenault
Date: 2023-02-19T22:05:54-04:00
New Revision: c177051f60e89b16533f5fca1db033507862b386

URL: https://github.com/llvm/llvm-project/commit/c177051f60e89b16533f5fca1db033507862b386
DIFF: https://github.com/llvm/llvm-project/commit/c177051f60e89b16533f5fca1db033507862b386.diff

LOG: AMDGPU: Restrict foldFreeOpFromSelect combine based on legal source mods

Provides a small code size savings for some f32 cases.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index c74be2d19cfd4..0aa6b22e2f9ce 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -597,6 +597,14 @@ static bool opMustUseVOP3Encoding(const SDNode *N, MVT VT) {
          VT == MVT::f64;
 }
 
+/// Return true if v_cndmask_b32 will support fabs/fneg source modifiers for the
+/// type for ISD::SELECT.
+LLVM_READONLY
+static bool selectSupportsSourceMods(const SDNode *N) {
+  // TODO: Only applies if select will be vector
+  return N->getValueType(0) == MVT::f32;
+}
+
 // Most FP instructions support source modifiers, but this could be refined
 // slightly.
 LLVM_READONLY
@@ -631,8 +639,7 @@ static bool hasSourceMods(const SDNode *N) {
     }
   }
   case ISD::SELECT:
-    // TODO: Only applies if select will be vector
-    return N->getValueType(0) == MVT::f32;
+    return selectSupportsSourceMods(N);
   default:
     return true;
   }
@@ -3758,7 +3765,8 @@ AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
 
   // TODO: Support vector constants.
   ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
-  if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS) {
+  if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS &&
+      !selectSupportsSourceMods(N.getNode())) {
     SDLoc SL(N);
     // If one side is an fneg/fabs and the other is a constant, we can push the
     // fneg/fabs down. If it's an fabs, the constant needs to be non-negative.

diff  --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
index 3e2d5f6ab6801..e76f6024cc03f 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
@@ -205,8 +205,8 @@ define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) #0 {
 ; GCN: buffer_load_dword [[X:v[0-9]+]]
 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
 
-; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
-; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]]
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, |[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
+; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
 define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
@@ -225,8 +225,8 @@ define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 {
 
 ; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
-; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
-; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]]
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, |[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
+; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
 define amdgpu_kernel void @add_select_posk_fabs_f32(i32 %c) #0 {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
@@ -677,7 +677,7 @@ define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) #0 {
 ; GCN: buffer_load_dword [[X:v[0-9]+]]
 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
 
-; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, s
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
 define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 {
   %x = load volatile float, ptr addrspace(1) undef
@@ -696,8 +696,8 @@ define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 {
 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
 
 ; GCN: s_cmp_lg_u32
-; GCN: s_cselect_b64 s[0:1], -1, 0
-; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, s[0:1]
+; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, [[VCC]]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
 define amdgpu_kernel void @mul_select_negk_negfabs_f32(i32 %c) #0 {
   %x = load volatile float, ptr addrspace(1) undef
@@ -953,8 +953,9 @@ define amdgpu_kernel void @mul_select_posk_0_negfabs_f32(i32 %c) #0 {
 ; GCN: buffer_load_dword [[X:v[0-9]+]]
 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
 
-; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0, [[X]], vcc
-; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
+; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[NEG0]], -|[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
+; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
 define amdgpu_kernel void @mul_select_negfabs_negk_0_f32(i32 %c) #0 {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
@@ -971,10 +972,11 @@ define amdgpu_kernel void @mul_select_negfabs_negk_0_f32(i32 %c) #0 {
 ; GCN: buffer_load_dword [[X:v[0-9]+]]
 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
 
+; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
 ; GCN: s_cmp_lg_u32
-; GCN: s_cselect_b64 vcc, -1, 0
-; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0, [[X]], vcc
-; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
+; GCN: s_cselect_b64 s[0:1], -1, 0
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[NEG0]], -|[[X]]|, s[0:1]
+; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
 define amdgpu_kernel void @mul_select_negk_0_negfabs_f32(i32 %c) #0 {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef