[llvm] 592de00 - AMDGPU/GlobalISel: Select llvm.amdgcn.update.dpp
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 17 17:10:02 PST 2020
Author: Matt Arsenault
Date: 2020-01-17T20:09:53-05:00
New Revision: 592de0009f6482fbf033ef87d147aab13fcce78c
URL: https://github.com/llvm/llvm-project/commit/592de0009f6482fbf033ef87d147aab13fcce78c
DIFF: https://github.com/llvm/llvm-project/commit/592de0009f6482fbf033ef87d147aab13fcce78c.diff
LOG: AMDGPU/GlobalISel: Select llvm.amdgcn.update.dpp
The existing test is overly reliant on -mattr=-flat-for-global, and
some missing optimizations to re-use.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/VOP1Instructions.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index d420aa02ac28..9d3f87409bd9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -206,12 +206,15 @@ foreach Ty = [i64, p0, p1, p4] in {
defm : SMRD_Pattern <"S_LOAD_DWORDX2", Ty>;
}
-def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">,
+def gi_as_i32timm : GICustomOperandRenderer<"renderTruncTImm32">,
GISDNodeXFormEquiv<as_i32timm>;
-def gi_as_i16timm : GICustomOperandRenderer<"renderTruncTImm">,
+def gi_as_i16timm : GICustomOperandRenderer<"renderTruncTImm16">,
GISDNodeXFormEquiv<as_i16timm>;
+def gi_as_i1timm : GICustomOperandRenderer<"renderTruncTImm1">,
+ GISDNodeXFormEquiv<as_i1timm>;
+
def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">,
GISDNodeXFormEquiv<NegateImm>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index a10c1ce20037..09e4fc8d05ce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2411,9 +2411,21 @@ void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB,
/// This only really exists to satisfy DAG type checking machinery, so is a
/// no-op here.
-void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB,
- const MachineInstr &MI,
- int OpIdx) const {
+void AMDGPUInstructionSelector::renderTruncTImm32(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ MIB.addImm(MI.getOperand(OpIdx).getImm());
+}
+
+void AMDGPUInstructionSelector::renderTruncTImm16(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ MIB.addImm(MI.getOperand(OpIdx).getImm());
+}
+
+void AMDGPUInstructionSelector::renderTruncTImm1(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
MIB.addImm(MI.getOperand(OpIdx).getImm());
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 89d5595016d0..e6d978a0e01b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -177,8 +177,12 @@ class AMDGPUInstructionSelector : public InstructionSelector {
void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
- void renderTruncTImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
- int OpIdx) const;
+ void renderTruncTImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderTruncTImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderTruncTImm1(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
void renderNegateImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 02004c2739a3..09705cef1957 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -669,6 +669,10 @@ def as_i1imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
}]>;
+def as_i1timm : SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
+}]>;
+
def as_i8imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8);
}]>;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 7660cbd0962d..a5d8df9ea807 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1907,9 +1907,9 @@ def : GCNPat <
def : GCNPat <
(i64 (int_amdgcn_update_dpp i64:$old, i64:$src, timm:$dpp_ctrl, timm:$row_mask,
timm:$bank_mask, timm:$bound_ctrl)),
- (V_MOV_B64_DPP_PSEUDO $old, $src, (as_i32imm $dpp_ctrl),
- (as_i32imm $row_mask), (as_i32imm $bank_mask),
- (as_i1imm $bound_ctrl))
+ (V_MOV_B64_DPP_PSEUDO VReg_64:$old, VReg_64:$src, (as_i32timm $dpp_ctrl),
+ (as_i32timm $row_mask), (as_i32timm $bank_mask),
+ (as_i1timm $bound_ctrl))
>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index c7aed0985540..09fd1fcf6e96 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -843,11 +843,12 @@ def : GCNPat <
>;
def : GCNPat <
- (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask,
- timm:$bank_mask, timm:$bound_ctrl)),
- (V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl),
- (as_i32imm $row_mask), (as_i32imm $bank_mask),
- (as_i1imm $bound_ctrl))
+ (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl,
+ timm:$row_mask, timm:$bank_mask,
+ timm:$bound_ctrl)),
+ (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl),
+ (as_i32timm $row_mask), (as_i32timm $bank_mask),
+ (as_i1timm $bound_ctrl))
>;
} // End OtherPredicates = [isGFX8Plus]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
new file mode 100644
index 000000000000..a5f06d2f5d23
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
+
+; FIXME: Merge with DAG test
+
+define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
+; GFX8-LABEL: dpp_test:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: v_mov_b32_e32 v2, s0
+; GFX8-NEXT: v_mov_b32_e32 v3, s1
+; GFX8-NEXT: v_mov_b32_e32 v1, s3
+; GFX8-NEXT: s_nop 0
+; GFX8-NEXT: v_mov_b32_dpp v2, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX8-NEXT: flat_store_dword v[0:1], v2
+; GFX8-NEXT: s_endpgm
+;
+; GFX10-LABEL: dpp_test:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: v_mov_b32_e32 v0, s2
+; GFX10-NEXT: v_mov_b32_e32 v2, s0
+; GFX10-NEXT: v_mov_b32_e32 v3, s1
+; GFX10-NEXT: v_mov_b32_e32 v1, s3
+; GFX10-NEXT: v_mov_b32_dpp v2, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX10-NEXT: global_store_dword v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+ %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 false)
+ store i32 %tmp0, i32 addrspace(1)* %out
+ ret void
+}
+define amdgpu_kernel void @update_dpp64_test(i64 addrspace(1)* %arg, i64 %in1, i64 %in2) {
+; GFX8-LABEL: update_dpp64_test:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX8-NEXT: v_mul_lo_u32 v2, 0, v0
+; GFX8-NEXT: v_mul_lo_u32 v1, 8, v1
+; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX8-NEXT: v_mul_hi_u32 v3, 8, v0
+; GFX8-NEXT: v_mul_lo_u32 v0, 8, v0
+; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1
+; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v4, s1
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc
+; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
+; GFX8-NEXT: v_mov_b32_e32 v5, s3
+; GFX8-NEXT: v_mov_b32_e32 v4, s2
+; GFX8-NEXT: s_nop 0
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_dpp v5, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX8-NEXT: v_mov_b32_dpp v4, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
+; GFX8-NEXT: s_endpgm
+;
+; GFX10-LABEL: update_dpp64_test:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX10-NEXT: v_mul_lo_u32 v2, 0, v0
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_mul_hi_u32 v3, 8, v0
+; GFX10-NEXT: v_mul_lo_u32 v0, 8, v0
+; GFX10-NEXT: v_mul_lo_u32 v1, 8, v1
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32_e64 v6, vcc_lo, s0, v0
+; GFX10-NEXT: v_mov_b32_e32 v5, s3
+; GFX10-NEXT: v_mov_b32_e32 v4, s2
+; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3
+; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, s1, v1, vcc_lo
+; GFX10-NEXT: global_load_dwordx2 v[2:3], v[6:7], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_mov_b32_dpp v4, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX10-NEXT: v_mov_b32_dpp v5, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX10-NEXT: global_store_dwordx2 v[6:7], v[4:5], off
+; GFX10-NEXT: s_endpgm
+ %id = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %id
+ %load = load i64, i64 addrspace(1)* %gep
+ %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 1, i32 1, i32 1, i1 false) #1
+ store i64 %tmp0, i64 addrspace(1)* %gep
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg) #1
+declare i64 @llvm.amdgcn.update.dpp.i64(i64, i64, i32 immarg, i32 immarg, i32 immarg, i1 immarg) #1
+
+attributes #0 = { nounwind readnone speculatable }
+attributes #1 = { convergent nounwind readnone }
More information about the llvm-commits
mailing list