[clang-tools-extra] [clang] [llvm] [AMDGPU] Reapply 'Sign extend simm16 in setreg intrinsic' (PR #78492)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Jan 17 11:31:56 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Stanislav Mekhanoshin (rampitec)
<details>
<summary>Changes</summary>
We currently force users to use a negative contant in the intrinsic call. Changing it zext would break existing programs, so just sign extend an argument.
---
Full diff: https://github.com/llvm/llvm-project/pull/78492.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+3-6)
- (modified) llvm/lib/Target/AMDGPU/SIModeRegister.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/SOPInstructions.td (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll (+66)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 1bd1d425573352..07572aa12e2c38 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -786,12 +786,9 @@ class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1);
}]>;
-def SIMM16bit : ImmLeaf <i32,
- [{return isInt<16>(Imm);}]
->;
-
-def UIMM16bit : ImmLeaf <i32,
- [{return isUInt<16>(Imm);}]
+def SIMM16bit : TImmLeaf <i32,
+ [{return isInt<16>(Imm) || isUInt<16>(Imm);}],
+ as_i16timm
>;
def i64imm_32bit : ImmLeaf<i64, [{
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index be395d53c34e99..e62ad026dc5c87 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -285,7 +285,7 @@ void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
1;
unsigned Offset =
(Dst & AMDGPU::Hwreg::OFFSET_MASK_) >> AMDGPU::Hwreg::OFFSET_SHIFT_;
- unsigned Mask = ((1 << Width) - 1) << Offset;
+ unsigned Mask = maskTrailingOnes<unsigned>(Width) << Offset;
// If an InsertionPoint is set we will insert a setreg there.
if (InsertionPoint) {
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index b78d900c9bbf42..d914c3d9032f5f 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1124,7 +1124,7 @@ class S_SETREG_B32_Pseudo <list<dag> pattern=[]> : SOPK_Pseudo <
pattern>;
def S_SETREG_B32 : S_SETREG_B32_Pseudo <
- [(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> {
+ [(int_amdgcn_s_setreg (i32 SIMM16bit:$simm16), i32:$sdst)]> {
// Use custom inserter to optimize some cases to
// S_DENORM_MODE/S_ROUND_MODE/S_SETREG_B32_mode.
let usesCustomInserter = 1;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
index a2b7e9de70e82c..05186ac2aa28f6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
@@ -1433,6 +1433,72 @@ define amdgpu_kernel void @test_setreg_set_4_bits_straddles_round_and_denorm() {
ret void
}
+define amdgpu_ps void @test_63489(i32 inreg %var.mode) {
+; GFX6-LABEL: test_63489:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x80,0xb9]
+; GFX6-NEXT: ;;#ASMSTART
+; GFX6-NEXT: ;;#ASMEND
+; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX789-LABEL: test_63489:
+; GFX789: ; %bb.0:
+; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x00,0xb9]
+; GFX789-NEXT: ;;#ASMSTART
+; GFX789-NEXT: ;;#ASMEND
+; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: test_63489:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x80,0xb9]
+; GFX10-NEXT: ;;#ASMSTART
+; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX11-LABEL: test_63489:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x00,0xb9]
+; GFX11-NEXT: ;;#ASMSTART
+; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
+ call void @llvm.amdgcn.s.setreg(i32 63489, i32 %var.mode)
+ call void asm sideeffect "", ""()
+ ret void
+}
+
+define amdgpu_ps void @test_minus_2047(i32 inreg %var.mode) {
+; GFX6-LABEL: test_minus_2047:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x80,0xb9]
+; GFX6-NEXT: ;;#ASMSTART
+; GFX6-NEXT: ;;#ASMEND
+; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX789-LABEL: test_minus_2047:
+; GFX789: ; %bb.0:
+; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x00,0xb9]
+; GFX789-NEXT: ;;#ASMSTART
+; GFX789-NEXT: ;;#ASMEND
+; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: test_minus_2047:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x80,0xb9]
+; GFX10-NEXT: ;;#ASMSTART
+; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX11-LABEL: test_minus_2047:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x00,0xb9]
+; GFX11-NEXT: ;;#ASMSTART
+; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
+ call void @llvm.amdgcn.s.setreg(i32 -2047, i32 %var.mode)
+ call void asm sideeffect "", ""()
+ ret void
+}
+
; FIXME: Broken for DAG
; define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) {
; call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
``````````
</details>
https://github.com/llvm/llvm-project/pull/78492
More information about the cfe-commits
mailing list