[llvm] c5c58fd - AMDGPU: Remove intermediate DAG node for trig_preop intrinsic

Tue Jun 16 18:20:46 PDT 2020

Author: Matt Arsenault
Date: 2020-06-16T21:06:25-04:00
New Revision: c5c58fd6b5c5b000df0fa2b9abdcbc76635c0368

URL: https://github.com/llvm/llvm-project/commit/c5c58fd6b5c5b000df0fa2b9abdcbc76635c0368
DIFF: https://github.com/llvm/llvm-project/commit/c5c58fd6b5c5b000df0fa2b9abdcbc76635c0368.diff

LOG: AMDGPU: Remove intermediate DAG node for trig_preop intrinsic

We weren't doing anything with this, and keeping it would just add
more boilerplate for GlobalISel.

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll

Modified: 
    llvm/include/llvm/IR/IntrinsicsAMDGPU.td
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
    llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/lib/Target/AMDGPU/VOP3Instructions.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index d49ad005ae86..033ca5a4acb5 100644

--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -234,6 +234,7 @@ def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty],
   [IntrNoMem, IntrSpeculatable]
 >;
 
+// Look Up 2.0 / pi src0 with segment select src1[4:0]
 def int_amdgcn_trig_preop : Intrinsic<
   [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
   [IntrNoMem, IntrSpeculatable]

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 366c0dcd4c1e..d856649092b5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4322,7 +4322,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(DIV_FMAS)
   NODE_NAME_CASE(DIV_FIXUP)
   NODE_NAME_CASE(FMAD_FTZ)
-  NODE_NAME_CASE(TRIG_PREOP)
   NODE_NAME_CASE(RCP)
   NODE_NAME_CASE(RSQ)
   NODE_NAME_CASE(RCP_LEGACY)
@@ -4738,7 +4737,6 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
   case AMDGPUISD::DIV_SCALE:
   case AMDGPUISD::DIV_FMAS:
   case AMDGPUISD::DIV_FIXUP:
-  case AMDGPUISD::TRIG_PREOP:
     // TODO: Refine on operands.
     return SNaN;
   case AMDGPUISD::SIN_HW:
@@ -4776,6 +4774,7 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
       // TODO: Need is known positive check.
       return false;
     }
+    case Intrinsic::amdgcn_trig_preop:
     case Intrinsic::amdgcn_fdot2:
       // TODO: Refine on operand
       return SNaN;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 410c7a7f6697..85f23c81db17 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -411,7 +411,6 @@ enum NodeType : unsigned {
   // For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is
   // treated as an illegal operation.
   FMAD_FTZ,
-  TRIG_PREOP, // 1 ULP max error for f64
 
   // RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
   //            For f64, max error 2^29 ULP, handles denormals.

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 59f9866b93b6..894677ec68b6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -18,10 +18,6 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
   SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
 ]>;
 
-def AMDGPUTrigPreOp : SDTypeProfile<1, 2,
-  [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
->;
-
 def AMDGPULdExpOp : SDTypeProfile<1, 2,
   [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
 >;
@@ -239,9 +235,6 @@ def AMDGPUdiv_fixup_impl : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
 
 def AMDGPUfmad_ftz_impl : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>;
 
-// Look Up 2.0 / pi src0 with segment select src1[4:0]
-def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
-
 def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
                           SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
                           [SDNPHasChain, SDNPMayLoad]>;

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a78844684638..2fc55fd5b280 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6422,9 +6422,6 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
 
-  case Intrinsic::amdgcn_trig_preop:
-    return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
-                       Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::amdgcn_div_scale: {
     const ConstantSDNode *Param = cast<ConstantSDNode>(Op.getOperand(3));
 
@@ -9241,7 +9238,6 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
   case AMDGPUISD::RSQ_CLAMP:
   case AMDGPUISD::RCP_LEGACY:
   case AMDGPUISD::RCP_IFLAG:
-  case AMDGPUISD::TRIG_PREOP:
   case AMDGPUISD::DIV_SCALE:
   case AMDGPUISD::DIV_FMAS:
   case AMDGPUISD::DIV_FIXUP:
@@ -9349,6 +9345,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
     case Intrinsic::amdgcn_rsq_clamp:
     case Intrinsic::amdgcn_rcp_legacy:
     case Intrinsic::amdgcn_rsq_legacy:
+    case Intrinsic::amdgcn_trig_preop:
       return true;
     default:
       break;

diff  --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 3907ce86ef56..169949f2171a 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -403,7 +403,7 @@ let Constraints = "@earlyclobber $vdst" in {
 def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
 } // End Constraints = "@earlyclobber $vdst"
 
-def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUtrig_preop> {
+def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, int_amdgcn_trig_preop> {
   let SchedRW = [WriteDouble];
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
new file mode 100644
index 000000000000..6e6b55f38733
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,CI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,VI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+
+define double @v_trig_preop_f64(double %a, i32 %b) {
+; GCN-LABEL: v_trig_preop_f64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_trig_preop_f64 v[0:1], v[0:1], v2
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b)
+  ret double %result
+}
+
+define double @v_trig_preop_f64_imm(double %a, i32 %b) {
+; GCN-LABEL: v_trig_preop_f64_imm:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_trig_preop_f64 v[0:1], v[0:1], 7
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7)
+  ret double %result
+}
+
+define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
+; CI-LABEL: s_trig_preop_f64:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v0, s2
+; CI-NEXT:    v_trig_preop_f64 v[0:1], s[0:1], v0
+; CI-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
+; CI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_trig_preop_f64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT:    s_load_dword s2, s[4:5], 0x8
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    v_trig_preop_f64 v[0:1], s[0:1], v0
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: s_trig_preop_f64:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT:    s_load_dword s2, s[4:5], 0x8
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-NEXT:    v_trig_preop_f64 v[0:1], s[0:1], v0
+; GFX9-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
+; GFX9-NEXT:    s_endpgm
+  %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b)
+  store volatile double %result, double* undef
+  ret void
+}
+
+define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
+; GCN-LABEL: s_trig_preop_f64_imm:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_trig_preop_f64 v[0:1], s[0:1], 7
+; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
+; GCN-NEXT:    s_endpgm
+  %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7)
+  store volatile double %result, double* undef
+  ret void
+}
+
+declare double @llvm.amdgcn.trig.preop.f64(double, i32) #0
+
+attributes #0 = { nounwind readnone speculatable }