[llvm] r373296 - AMDGPU/GlobalISel: Add support for init.exec intrinsics
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 30 19:07:25 PDT 2019
Author: arsenm
Date: Mon Sep 30 19:07:25 2019
New Revision: 373296
URL: http://llvm.org/viewvc/llvm-project?rev=373296&view=rev
Log:
AMDGPU/GlobalISel: Add support for init.exec intrinsics
TThe existing wave32 behavior seems broken and incomplete, but this
reproduces it.
Added:
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll
llvm/trunk/test/CodeGen/AMDGPU/wave32.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td?rev=373296&r1=373295&r2=373296&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td Mon Sep 30 19:07:25 2019
@@ -202,3 +202,6 @@ def : GISelVop2Pat <or, V_OR_B32_e32, i3
foreach Ty = [i64, p0, p1, p4] in {
defm : SMRD_Pattern <"S_LOAD_DWORDX2", Ty>;
}
+
+def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">,
+ GISDNodeXFormEquiv<as_i32timm>;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp?rev=373296&r1=373295&r2=373296&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp Mon Sep 30 19:07:25 2019
@@ -2094,3 +2094,12 @@ AMDGPUInstructionSelector::selectDS1Addr
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
}};
}
+
+void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
+ const MachineInstr &MI) const {
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+ Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
+ assert(CstVal && "Expected constant value");
+ MIB.addImm(CstVal.getValue());
+}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h?rev=373296&r1=373295&r2=373296&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h Mon Sep 30 19:07:25 2019
@@ -163,6 +163,9 @@ private:
InstructionSelector::ComplexRendererFns
selectDS1Addr1Offset(MachineOperand &Root) const;
+ void renderTruncImm32(MachineInstrBuilder &MIB,
+ const MachineInstr &MI) const;
+
const SIInstrInfo &TII;
const SIRegisterInfo &TRI;
const AMDGPURegisterBankInfo &RBI;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp?rev=373296&r1=373295&r2=373296&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Mon Sep 30 19:07:25 2019
@@ -2605,7 +2605,8 @@ AMDGPURegisterBankInfo::getInstrMapping(
OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
break;
}
- case Intrinsic::amdgcn_end_cf: {
+ case Intrinsic::amdgcn_end_cf:
+ case Intrinsic::amdgcn_init_exec: {
unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
@@ -2658,6 +2659,12 @@ AMDGPURegisterBankInfo::getInstrMapping(
OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
break;
}
+ case Intrinsic::amdgcn_init_exec_from_input: {
+ unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
+ OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ break;
+ }
default:
if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=373296&r1=373295&r2=373296&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Mon Sep 30 19:07:25 2019
@@ -689,6 +689,10 @@ def as_i32imm: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
}]>;
+def as_i32timm: SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
def as_i64imm: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
}]>;
@@ -728,6 +732,10 @@ def UIMM16bit : ImmLeaf <i32,
[{return isUInt<16>(Imm);}]
>;
+def i64imm_32bit : ImmLeaf<i64, [{
+ return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
+}]>;
+
class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
return isInlineImmediate(N);
}]>;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=373296&r1=373295&r2=373296&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Mon Sep 30 19:07:25 2019
@@ -349,7 +349,8 @@ def SI_INIT_M0 : SPseudoInstSI <(outs),
}
def SI_INIT_EXEC : SPseudoInstSI <
- (outs), (ins i64imm:$src), []> {
+ (outs), (ins i64imm:$src),
+ [(int_amdgcn_init_exec (i64 timm:$src))]> {
let Defs = [EXEC];
let usesCustomInserter = 1;
let isAsCheapAsAMove = 1;
@@ -365,12 +366,20 @@ def SI_INIT_EXEC_LO : SPseudoInstSI <
let WaveSizePredicate = isWave32;
}
+// FIXME: Wave32 version
def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI <
- (outs), (ins SSrc_b32:$input, i32imm:$shift), []> {
+ (outs), (ins SSrc_b32:$input, i32imm:$shift),
+ [(int_amdgcn_init_exec_from_input i32:$input, (i32 timm:$shift))]> {
let Defs = [EXEC];
let usesCustomInserter = 1;
}
+def : GCNPat <
+ (int_amdgcn_init_exec timm:$src),
+ (SI_INIT_EXEC_LO (as_i32imm imm:$src))> {
+ let WaveSizePredicate = isWave32;
+}
+
// Return for returning shaders to a shader variant epilog.
def SI_RETURN_TO_EPILOG : SPseudoInstSI <
(outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> {
@@ -609,23 +618,6 @@ def : GCNPat <
(SI_PC_ADD_REL_OFFSET $ptr_lo, (i32 0))
>;
-def : GCNPat <
- (int_amdgcn_init_exec i64:$src),
- (SI_INIT_EXEC (as_i64imm $src))> {
- let WaveSizePredicate = isWave64;
-}
-
-def : GCNPat <
- (int_amdgcn_init_exec i64:$src),
- (SI_INIT_EXEC_LO (as_i32imm $src))> {
- let WaveSizePredicate = isWave32;
-}
-
-def : GCNPat <
- (int_amdgcn_init_exec_from_input i32:$input, i32:$shift),
- (SI_INIT_EXEC_FROM_INPUT (i32 $input), (as_i32imm $shift))
->;
-
def : GCNPat<
(AMDGPUtrap timm:$trapid),
(S_TRAP $trapid)
Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll?rev=373296&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll Mon Sep 30 19:07:25 2019
@@ -0,0 +1,2 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.ll | FileCheck -check-prefix=GCN %S/../llvm.amdgcn.init.exec.ll
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.ll | FileCheck -check-prefix=GCN %S/../llvm.amdgcn.init.exec.ll
Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll?rev=373296&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll Mon Sep 30 19:07:25 2019
@@ -0,0 +1,2 @@
+; Runs original SDAG test with -global-isel
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.wave32.ll | FileCheck -check-prefixes=GCN,GFX1032 %S/../llvm.amdgcn.init.exec.wave32.ll
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll?rev=373296&r1=373295&r2=373296&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll Mon Sep 30 19:07:25 2019
@@ -1,4 +1,5 @@
-;RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefix=GCN
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefix=GCN
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}full_mask:
; GCN: s_mov_b64 exec, -1
@@ -51,7 +52,7 @@ main_body:
; GCN: s_bfm_b64 exec, s1, 0
; GCN: s_cmp_eq_u32 s1, 64
; GCN: s_cmov_b64 exec, -1
-; GCN: v_add_u32_e32 v0, s0, v0
+; GCN: v_add{{(_nc)?}}_u32_e32 v0, s0, v0
define amdgpu_ps float @reuse_input(i32 inreg %count, i32 %a) {
main_body:
call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19)
@@ -65,7 +66,7 @@ main_body:
; GCN: s_bfm_b64 exec, s1, 0
; GCN: s_cmp_eq_u32 s1, 64
; GCN: s_cmov_b64 exec, -1
-; GCN: v_add_u32_e32 v0, s0, v0
+; GCN: v_add{{(_nc)?}}_u32_e32 v0, s0, v0
define amdgpu_ps float @reuse_input2(i32 inreg %count, i32 %a) {
main_body:
%s = add i32 %a, %count
Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll?rev=373296&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll Mon Sep 30 19:07:25 2019
@@ -0,0 +1,31 @@
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1032 %s
+
+; GCN-LABEL: {{^}}test_init_exec:
+; GFX1032: s_mov_b32 exec_lo, 0x12345
+; GFX1064: s_mov_b64 exec, 0x12345
+; GCN: v_add_f32_e32 v0,
+define amdgpu_ps float @test_init_exec(float %a, float %b) {
+main_body:
+ %s = fadd float %a, %b
+ call void @llvm.amdgcn.init.exec(i64 74565)
+ ret float %s
+}
+
+; GCN-LABEL: {{^}}test_init_exec_from_input:
+; GCN: s_bfe_u32 s0, s3, 0x70008
+; GFX1032: s_bfm_b32 exec_lo, s0, 0
+; GFX1032: s_cmp_eq_u32 s0, 32
+; GFX1032: s_cmov_b32 exec_lo, -1
+; GFX1064: s_bfm_b64 exec, s0, 0
+; GFX1064: s_cmp_eq_u32 s0, 64
+; GFX1064: s_cmov_b64 exec, -1
+; GCN: v_add_f32_e32 v0,
+define amdgpu_ps float @test_init_exec_from_input(i32 inreg, i32 inreg, i32 inreg, i32 inreg %count, float %a, float %b) {
+main_body:
+ %s = fadd float %a, %b
+ call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 8)
+ ret float %s
+}
+
+declare void @llvm.amdgcn.init.exec(i64)
+declare void @llvm.amdgcn.init.exec.from.input(i32, i32)
Modified: llvm/trunk/test/CodeGen/AMDGPU/wave32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wave32.ll?rev=373296&r1=373295&r2=373296&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/wave32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/wave32.ll Mon Sep 30 19:07:25 2019
@@ -871,33 +871,6 @@ endif:
ret void
}
-; GCN-LABEL: {{^}}test_init_exec:
-; GFX1032: s_mov_b32 exec_lo, 0x12345
-; GFX1064: s_mov_b64 exec, 0x12345
-; GCN: v_add_f32_e32 v0,
-define amdgpu_ps float @test_init_exec(float %a, float %b) {
-main_body:
- %s = fadd float %a, %b
- call void @llvm.amdgcn.init.exec(i64 74565)
- ret float %s
-}
-
-; GCN-LABEL: {{^}}test_init_exec_from_input:
-; GCN: s_bfe_u32 s0, s3, 0x70008
-; GFX1032: s_bfm_b32 exec_lo, s0, 0
-; GFX1032: s_cmp_eq_u32 s0, 32
-; GFX1032: s_cmov_b32 exec_lo, -1
-; GFX1064: s_bfm_b64 exec, s0, 0
-; GFX1064: s_cmp_eq_u32 s0, 64
-; GFX1064: s_cmov_b64 exec, -1
-; GCN: v_add_f32_e32 v0,
-define amdgpu_ps float @test_init_exec_from_input(i32 inreg, i32 inreg, i32 inreg, i32 inreg %count, float %a, float %b) {
-main_body:
- %s = fadd float %a, %b
- call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 8)
- ret float %s
-}
-
; GCN-LABEL: {{^}}test_vgprblocks_w32_attr:
; Test that the wave size can be overridden in function attributes and that the block size is correct as a result
; GFX10DEFWAVE: ; VGPRBlocks: 1
@@ -1132,8 +1105,6 @@ declare i32 @llvm.amdgcn.icmp.i32.i32(i3
declare void @llvm.amdgcn.kill(i1)
declare i1 @llvm.amdgcn.wqm.vote(i1)
declare i1 @llvm.amdgcn.ps.live()
-declare void @llvm.amdgcn.init.exec(i64)
-declare void @llvm.amdgcn.init.exec.from.input(i32, i32)
declare i64 @llvm.cttz.i64(i64, i1)
declare i32 @llvm.cttz.i32(i32, i1)
More information about the llvm-commits
mailing list