[llvm] r373296 - AMDGPU/GlobalISel: Add support for init.exec intrinsics

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 30 19:07:25 PDT 2019


Author: arsenm
Date: Mon Sep 30 19:07:25 2019
New Revision: 373296

URL: http://llvm.org/viewvc/llvm-project?rev=373296&view=rev
Log:
AMDGPU/GlobalISel: Add support for init.exec intrinsics

TThe existing wave32 behavior seems broken and incomplete, but this
reproduces it.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll
    llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td
    llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
    llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
    llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll
    llvm/trunk/test/CodeGen/AMDGPU/wave32.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td?rev=373296&r1=373295&r2=373296&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td Mon Sep 30 19:07:25 2019
@@ -202,3 +202,6 @@ def : GISelVop2Pat <or, V_OR_B32_e32, i3
 foreach Ty = [i64, p0, p1, p4] in {
   defm : SMRD_Pattern <"S_LOAD_DWORDX2",  Ty>;
 }
+
+def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">,
+  GISDNodeXFormEquiv<as_i32timm>;

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp?rev=373296&r1=373295&r2=373296&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp Mon Sep 30 19:07:25 2019
@@ -2094,3 +2094,12 @@ AMDGPUInstructionSelector::selectDS1Addr
       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
     }};
 }
+
+void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
+                                                 const MachineInstr &MI) const {
+  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+  Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
+  assert(CstVal && "Expected constant value");
+  MIB.addImm(CstVal.getValue());
+}

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h?rev=373296&r1=373295&r2=373296&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h Mon Sep 30 19:07:25 2019
@@ -163,6 +163,9 @@ private:
   InstructionSelector::ComplexRendererFns
   selectDS1Addr1Offset(MachineOperand &Root) const;
 
+  void renderTruncImm32(MachineInstrBuilder &MIB,
+                        const MachineInstr &MI) const;
+
   const SIInstrInfo &TII;
   const SIRegisterInfo &TRI;
   const AMDGPURegisterBankInfo &RBI;

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp?rev=373296&r1=373295&r2=373296&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Mon Sep 30 19:07:25 2019
@@ -2605,7 +2605,8 @@ AMDGPURegisterBankInfo::getInstrMapping(
       OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
       break;
     }
-    case Intrinsic::amdgcn_end_cf: {
+    case Intrinsic::amdgcn_end_cf:
+    case Intrinsic::amdgcn_init_exec: {
       unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
       OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
       break;
@@ -2658,6 +2659,12 @@ AMDGPURegisterBankInfo::getInstrMapping(
       OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
       break;
     }
+    case Intrinsic::amdgcn_init_exec_from_input: {
+      unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
+      OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+      OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+      break;
+    }
     default:
       if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
               AMDGPU::lookupRsrcIntrinsic(IntrID)) {

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=373296&r1=373295&r2=373296&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Mon Sep 30 19:07:25 2019
@@ -689,6 +689,10 @@ def as_i32imm: SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
 }]>;
 
+def as_i32timm: SDNodeXForm<timm, [{
+  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
 def as_i64imm: SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
 }]>;
@@ -728,6 +732,10 @@ def UIMM16bit : ImmLeaf <i32,
   [{return isUInt<16>(Imm);}]
 >;
 
+def i64imm_32bit : ImmLeaf<i64, [{
+  return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
+}]>;
+
 class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
   return isInlineImmediate(N);
 }]>;

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=373296&r1=373295&r2=373296&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Mon Sep 30 19:07:25 2019
@@ -349,7 +349,8 @@ def SI_INIT_M0 : SPseudoInstSI <(outs),
 }
 
 def SI_INIT_EXEC : SPseudoInstSI <
-  (outs), (ins i64imm:$src), []> {
+  (outs), (ins i64imm:$src),
+  [(int_amdgcn_init_exec (i64 timm:$src))]> {
   let Defs = [EXEC];
   let usesCustomInserter = 1;
   let isAsCheapAsAMove = 1;
@@ -365,12 +366,20 @@ def SI_INIT_EXEC_LO : SPseudoInstSI <
   let WaveSizePredicate = isWave32;
 }
 
+// FIXME: Wave32 version
 def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI <
-  (outs), (ins SSrc_b32:$input, i32imm:$shift), []> {
+  (outs), (ins SSrc_b32:$input, i32imm:$shift),
+  [(int_amdgcn_init_exec_from_input i32:$input, (i32 timm:$shift))]> {
   let Defs = [EXEC];
   let usesCustomInserter = 1;
 }
 
+def : GCNPat <
+  (int_amdgcn_init_exec timm:$src),
+  (SI_INIT_EXEC_LO (as_i32imm imm:$src))> {
+  let WaveSizePredicate = isWave32;
+}
+
 // Return for returning shaders to a shader variant epilog.
 def SI_RETURN_TO_EPILOG : SPseudoInstSI <
   (outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> {
@@ -609,23 +618,6 @@ def : GCNPat <
   (SI_PC_ADD_REL_OFFSET $ptr_lo, (i32 0))
 >;
 
-def : GCNPat <
-  (int_amdgcn_init_exec i64:$src),
-  (SI_INIT_EXEC (as_i64imm $src))> {
-  let WaveSizePredicate = isWave64;
-}
-
-def : GCNPat <
-  (int_amdgcn_init_exec i64:$src),
-  (SI_INIT_EXEC_LO (as_i32imm $src))> {
-  let WaveSizePredicate = isWave32;
-}
-
-def : GCNPat <
-  (int_amdgcn_init_exec_from_input i32:$input, i32:$shift),
-  (SI_INIT_EXEC_FROM_INPUT (i32 $input), (as_i32imm $shift))
->;
-
 def : GCNPat<
   (AMDGPUtrap timm:$trapid),
   (S_TRAP $trapid)

Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll?rev=373296&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll Mon Sep 30 19:07:25 2019
@@ -0,0 +1,2 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.ll | FileCheck -check-prefix=GCN %S/../llvm.amdgcn.init.exec.ll
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.ll | FileCheck -check-prefix=GCN %S/../llvm.amdgcn.init.exec.ll

Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll?rev=373296&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll Mon Sep 30 19:07:25 2019
@@ -0,0 +1,2 @@
+; Runs original SDAG test with -global-isel
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.wave32.ll | FileCheck -check-prefixes=GCN,GFX1032  %S/../llvm.amdgcn.init.exec.wave32.ll

Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll?rev=373296&r1=373295&r2=373296&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll Mon Sep 30 19:07:25 2019
@@ -1,4 +1,5 @@
-;RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefix=GCN
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefix=GCN
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}full_mask:
 ; GCN: s_mov_b64 exec, -1
@@ -51,7 +52,7 @@ main_body:
 ; GCN: s_bfm_b64 exec, s1, 0
 ; GCN: s_cmp_eq_u32 s1, 64
 ; GCN: s_cmov_b64 exec, -1
-; GCN: v_add_u32_e32 v0, s0, v0
+; GCN: v_add{{(_nc)?}}_u32_e32 v0, s0, v0
 define amdgpu_ps float @reuse_input(i32 inreg %count, i32 %a) {
 main_body:
   call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19)
@@ -65,7 +66,7 @@ main_body:
 ; GCN: s_bfm_b64 exec, s1, 0
 ; GCN: s_cmp_eq_u32 s1, 64
 ; GCN: s_cmov_b64 exec, -1
-; GCN: v_add_u32_e32 v0, s0, v0
+; GCN: v_add{{(_nc)?}}_u32_e32 v0, s0, v0
 define amdgpu_ps float @reuse_input2(i32 inreg %count, i32 %a) {
 main_body:
   %s = add i32 %a, %count

Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll?rev=373296&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll Mon Sep 30 19:07:25 2019
@@ -0,0 +1,31 @@
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1032 %s
+
+; GCN-LABEL: {{^}}test_init_exec:
+; GFX1032: s_mov_b32 exec_lo, 0x12345
+; GFX1064: s_mov_b64 exec, 0x12345
+; GCN: v_add_f32_e32 v0,
+define amdgpu_ps float @test_init_exec(float %a, float %b) {
+main_body:
+  %s = fadd float %a, %b
+  call void @llvm.amdgcn.init.exec(i64 74565)
+  ret float %s
+}
+
+; GCN-LABEL: {{^}}test_init_exec_from_input:
+; GCN: s_bfe_u32 s0, s3, 0x70008
+; GFX1032: s_bfm_b32 exec_lo, s0, 0
+; GFX1032: s_cmp_eq_u32 s0, 32
+; GFX1032: s_cmov_b32 exec_lo, -1
+; GFX1064: s_bfm_b64 exec, s0, 0
+; GFX1064: s_cmp_eq_u32 s0, 64
+; GFX1064: s_cmov_b64 exec, -1
+; GCN: v_add_f32_e32 v0,
+define amdgpu_ps float @test_init_exec_from_input(i32 inreg, i32 inreg, i32 inreg, i32 inreg %count, float %a, float %b) {
+main_body:
+  %s = fadd float %a, %b
+  call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 8)
+  ret float %s
+}
+
+declare void @llvm.amdgcn.init.exec(i64)
+declare void @llvm.amdgcn.init.exec.from.input(i32, i32)

Modified: llvm/trunk/test/CodeGen/AMDGPU/wave32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wave32.ll?rev=373296&r1=373295&r2=373296&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/wave32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/wave32.ll Mon Sep 30 19:07:25 2019
@@ -871,33 +871,6 @@ endif:
   ret void
 }
 
-; GCN-LABEL: {{^}}test_init_exec:
-; GFX1032: s_mov_b32 exec_lo, 0x12345
-; GFX1064: s_mov_b64 exec, 0x12345
-; GCN: v_add_f32_e32 v0,
-define amdgpu_ps float @test_init_exec(float %a, float %b) {
-main_body:
-  %s = fadd float %a, %b
-  call void @llvm.amdgcn.init.exec(i64 74565)
-  ret float %s
-}
-
-; GCN-LABEL: {{^}}test_init_exec_from_input:
-; GCN: s_bfe_u32 s0, s3, 0x70008
-; GFX1032: s_bfm_b32 exec_lo, s0, 0
-; GFX1032: s_cmp_eq_u32 s0, 32
-; GFX1032: s_cmov_b32 exec_lo, -1
-; GFX1064: s_bfm_b64 exec, s0, 0
-; GFX1064: s_cmp_eq_u32 s0, 64
-; GFX1064: s_cmov_b64 exec, -1
-; GCN: v_add_f32_e32 v0,
-define amdgpu_ps float @test_init_exec_from_input(i32 inreg, i32 inreg, i32 inreg, i32 inreg %count, float %a, float %b) {
-main_body:
-  %s = fadd float %a, %b
-  call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 8)
-  ret float %s
-}
-
 ; GCN-LABEL: {{^}}test_vgprblocks_w32_attr:
 ; Test that the wave size can be overridden in function attributes and that the block size is correct as a result
 ; GFX10DEFWAVE: ; VGPRBlocks: 1
@@ -1132,8 +1105,6 @@ declare i32 @llvm.amdgcn.icmp.i32.i32(i3
 declare void @llvm.amdgcn.kill(i1)
 declare i1 @llvm.amdgcn.wqm.vote(i1)
 declare i1 @llvm.amdgcn.ps.live()
-declare void @llvm.amdgcn.init.exec(i64)
-declare void @llvm.amdgcn.init.exec.from.input(i32, i32)
 declare i64 @llvm.cttz.i64(i64, i1)
 declare i32 @llvm.cttz.i32(i32, i1)
 




More information about the llvm-commits mailing list