[llvm] 6085593 - [AMDGPU] simplifyI24 - replace GetDemandedBits with SimplifyMultipleUseDemandedBits

Thu Feb 20 04:03:24 PST 2020

Author: Simon Pilgrim
Date: 2020-02-20T12:03:08Z
New Revision: 6085593c128e91fd7db998c5441ebe120c7e4f04

URL: https://github.com/llvm/llvm-project/commit/6085593c128e91fd7db998c5441ebe120c7e4f04
DIFF: https://github.com/llvm/llvm-project/commit/6085593c128e91fd7db998c5441ebe120c7e4f04.diff

LOG: [AMDGPU] simplifyI24 - replace GetDemandedBits with SimplifyMultipleUseDemandedBits

GetDemandedBits mostly just calls SimplifyMultipleUseDemandedBits now, but it does a very blunt constant simplification that SimplifyMultipleUseDemandedBits avoids.

If we need to demand bits from constants we should handle this through ShrinkDemandedConstant/targetShrinkDemandedConstant.

@arsenm confirmed that the sign extended immediates are better for code size.

Differential Revision: https://reviews.llvm.org/D74857

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll
    llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 8cbe4485db4f..79a0d207d256 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2822,6 +2822,7 @@ static bool isI24(SDValue Op, SelectionDAG &DAG) {
 static SDValue simplifyI24(SDNode *Node24,
                            TargetLowering::DAGCombinerInfo &DCI) {
   SelectionDAG &DAG = DCI.DAG;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   bool IsIntrin = Node24->getOpcode() == ISD::INTRINSIC_WO_CHAIN;
 
   SDValue LHS = IsIntrin ? Node24->getOperand(1) : Node24->getOperand(0);
@@ -2835,11 +2836,11 @@ static SDValue simplifyI24(SDNode *Node24,
 
   APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24);
 
-  // First try to simplify using GetDemandedBits which allows the operands to
-  // have other uses, but will only perform simplifications that involve
-  // bypassing some nodes for this user.
-  SDValue DemandedLHS = DAG.GetDemandedBits(LHS, Demanded);
-  SDValue DemandedRHS = DAG.GetDemandedBits(RHS, Demanded);
+  // First try to simplify using SimplifyMultipleUseDemandedBits which allows
+  // the operands to have other uses, but will only perform simplifications that
+  // involve bypassing some nodes for this user.
+  SDValue DemandedLHS = TLI.SimplifyMultipleUseDemandedBits(LHS, Demanded, DAG);
+  SDValue DemandedRHS = TLI.SimplifyMultipleUseDemandedBits(RHS, Demanded, DAG);
   if (DemandedLHS || DemandedRHS)
     return DAG.getNode(NewOpcode, SDLoc(Node24), Node24->getVTList(),
                        DemandedLHS ? DemandedLHS : LHS,
@@ -2847,7 +2848,6 @@ static SDValue simplifyI24(SDNode *Node24,
 
   // Now try SimplifyDemandedBits which can simplify the nodes used by our
   // operands if this node is the only user.
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (TLI.SimplifyDemandedBits(LHS, Demanded, DCI))
     return SDValue(Node24, 0);
   if (TLI.SimplifyDemandedBits(RHS, Demanded, DCI))

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll
index 1f660df598e4..d37b4052acda 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll
@@ -1,11 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-
 ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=GCN %s
+
 define weak_odr amdgpu_kernel void @test_mul24_knownbits_kernel(float addrspace(1)* %p) #4 {
 ; GCN-LABEL: test_mul24_knownbits_kernel:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    v_and_b32_e32 v0, 3, v0
-; GCN-NEXT:    v_mul_i32_i24_e32 v0, 0xfffffb, v0
+; GCN-NEXT:    v_mul_i32_i24_e32 v0, -5, v0
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
 ; GCN-NEXT:    v_and_b32_e32 v0, 0xffffffe0, v0
 ; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0

diff  --git a/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll b/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
index a8f5603ef237..bd1b5c39037d 100644
--- a/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
+++ b/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
@@ -125,7 +125,7 @@ define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 ad
 ; GCN-NEXT:    v_mov_b32_e32 v4, v2
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_or_b32_e32 v0, 0x800000, v1
-; GCN-NEXT:    v_mul_i32_i24_e32 v0, 0xfffff9, v0
+; GCN-NEXT:    v_mul_i32_i24_e32 v0, -7, v0
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
 ; GCN-NEXT:    buffer_store_dwordx2 v[1:2], v[3:4], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm