[llvm] r375450 - AMDGPU: Use CopyToReg for interp intrinsic lowering
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 21 12:53:49 PDT 2019
Author: arsenm
Date: Mon Oct 21 12:53:49 2019
New Revision: 375450
URL: http://llvm.org/viewvc/llvm-project?rev=375450&view=rev
Log:
AMDGPU: Use CopyToReg for interp intrinsic lowering
This doesn't use the default value, so doesn't benefit from the hack
to help optimize it.
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=375450&r1=375449&r2=375450&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Mon Oct 21 12:53:49 2019
@@ -5877,34 +5877,35 @@ SDValue SITargetLowering::LowerINTRINSIC
case Intrinsic::amdgcn_fdiv_fast:
return lowerFDIV_FAST(Op, DAG);
case Intrinsic::amdgcn_interp_mov: {
- SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
- SDValue Glue = M0.getValue(1);
+ SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
+ Op.getOperand(4), SDValue());
return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32, Op.getOperand(1),
- Op.getOperand(2), Op.getOperand(3), Glue);
+ Op.getOperand(2), Op.getOperand(3), ToM0.getValue(1));
}
case Intrinsic::amdgcn_interp_p1: {
- SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
- SDValue Glue = M0.getValue(1);
+ SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
+ Op.getOperand(4), SDValue());
return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1),
- Op.getOperand(2), Op.getOperand(3), Glue);
+ Op.getOperand(2), Op.getOperand(3), ToM0.getValue(1));
}
case Intrinsic::amdgcn_interp_p2: {
- SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
- SDValue Glue = SDValue(M0.getNode(), 1);
+ SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
+ Op.getOperand(5), SDValue());
return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
- Glue);
+ ToM0.getValue(1));
}
case Intrinsic::amdgcn_interp_p1_f16: {
- SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
- SDValue Glue = M0.getValue(1);
+ SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
+ Op.getOperand(5), SDValue());
+
if (getSubtarget()->getLDSBankCount() == 16) {
// 16 bank LDS
SDValue S = DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
DAG.getConstant(2, DL, MVT::i32), // P0
Op.getOperand(2), // Attrchan
Op.getOperand(3), // Attr
- Glue);
+ ToM0.getValue(1));
SDValue Ops[] = {
Op.getOperand(1), // Src0
Op.getOperand(2), // Attrchan
@@ -5927,14 +5928,14 @@ SDValue SITargetLowering::LowerINTRINSIC
Op.getOperand(4), // high
DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
DAG.getTargetConstant(0, DL, MVT::i32), // $omod
- Glue
+ ToM0.getValue(1)
};
return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops);
}
}
case Intrinsic::amdgcn_interp_p2_f16: {
- SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(6));
- SDValue Glue = SDValue(M0.getNode(), 1);
+ SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
+ Op.getOperand(6), SDValue());
SDValue Ops[] = {
Op.getOperand(2), // Src0
Op.getOperand(3), // Attrchan
@@ -5944,7 +5945,7 @@ SDValue SITargetLowering::LowerINTRINSIC
DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
Op.getOperand(5), // high
DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
- Glue
+ ToM0.getValue(1)
};
return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops);
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll?rev=375450&r1=375449&r2=375450&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll Mon Oct 21 12:53:49 2019
@@ -6,8 +6,8 @@
define amdgpu_ps half @interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
; GFX9-32BANK-LABEL: interp_f16:
; GFX9-32BANK: ; %bb.0: ; %main_body
-; GFX9-32BANK-NEXT: v_mov_b32_e32 v0, s0
; GFX9-32BANK-NEXT: s_mov_b32 m0, s2
+; GFX9-32BANK-NEXT: v_mov_b32_e32 v0, s0
; GFX9-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
; GFX9-32BANK-NEXT: v_interp_p1ll_f16 v1, v0, attr2.y
; GFX9-32BANK-NEXT: v_mov_b32_e32 v2, s1
@@ -20,8 +20,8 @@ define amdgpu_ps half @interp_f16(float
;
; GFX8-32BANK-LABEL: interp_f16:
; GFX8-32BANK: ; %bb.0: ; %main_body
-; GFX8-32BANK-NEXT: v_mov_b32_e32 v0, s0
; GFX8-32BANK-NEXT: s_mov_b32 m0, s2
+; GFX8-32BANK-NEXT: v_mov_b32_e32 v0, s0
; GFX8-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
; GFX8-32BANK-NEXT: v_interp_p1ll_f16 v1, v0, attr2.y
; GFX8-32BANK-NEXT: v_mov_b32_e32 v2, s1
@@ -119,8 +119,8 @@ main_body:
define amdgpu_ps half @interp_p2_m0_setup(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
; GFX9-32BANK-LABEL: interp_p2_m0_setup:
; GFX9-32BANK: ; %bb.0: ; %main_body
-; GFX9-32BANK-NEXT: v_mov_b32_e32 v0, s0
; GFX9-32BANK-NEXT: s_mov_b32 m0, s2
+; GFX9-32BANK-NEXT: v_mov_b32_e32 v0, s0
; GFX9-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
; GFX9-32BANK-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y
; GFX9-32BANK-NEXT: ;;#ASMSTART
@@ -136,8 +136,8 @@ define amdgpu_ps half @interp_p2_m0_setu
;
; GFX8-32BANK-LABEL: interp_p2_m0_setup:
; GFX8-32BANK: ; %bb.0: ; %main_body
-; GFX8-32BANK-NEXT: v_mov_b32_e32 v0, s0
; GFX8-32BANK-NEXT: s_mov_b32 m0, s2
+; GFX8-32BANK-NEXT: v_mov_b32_e32 v0, s0
; GFX8-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
; GFX8-32BANK-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y
; GFX8-32BANK-NEXT: ;;#ASMSTART
More information about the llvm-commits
mailing list