[llvm] [AMDGPU] Sign extend simm16 in setreg intrinsic (PR #77997)
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 12 15:12:25 PST 2024
https://github.com/rampitec created https://github.com/llvm/llvm-project/pull/77997
We currently force users to use a negative contant in the intrinsic call. Changing it zext would break existing programs, so just sign extend an argument.
>From 01af6c9d8e80b810bbdec35dee38b1cf5d73cfe0 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Fri, 12 Jan 2024 15:07:53 -0800
Subject: [PATCH] [AMDGPU] Sign extend simm16 in setreg intrinsic
We currently force users to use a negative contant in the
intrinsic call. Changing it zext would break existing programs,
so just sign extend an argument.
---
llvm/lib/Target/AMDGPU/SOPInstructions.td | 11 ++--
.../CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll | 66 +++++++++++++++++++
2 files changed, 72 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 46fa3d57a21cb2..5b35d4dcac2e4f 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1117,14 +1117,12 @@ def S_GETREG_B32 : SOPK_Pseudo <
let Defs = [MODE], Uses = [MODE] in {
// FIXME: Need to truncate immediate to 16-bits.
-class S_SETREG_B32_Pseudo <list<dag> pattern=[]> : SOPK_Pseudo <
+class S_SETREG_B32_Pseudo : SOPK_Pseudo <
"s_setreg_b32",
(outs), (ins SReg_32:$sdst, hwreg:$simm16),
- "$simm16, $sdst",
- pattern>;
+ "$simm16, $sdst">;
-def S_SETREG_B32 : S_SETREG_B32_Pseudo <
- [(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> {
+def S_SETREG_B32 : S_SETREG_B32_Pseudo {
// Use custom inserter to optimize some cases to
// S_DENORM_MODE/S_ROUND_MODE/S_SETREG_B32_mode.
let usesCustomInserter = 1;
@@ -1160,6 +1158,9 @@ def S_SETREG_IMM32_B32_mode : S_SETREG_IMM32_B32_Pseudo {
} // End Defs = [MODE], Uses = [MODE]
+def : GCNPat<(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst),
+ (S_SETREG_B32 $sdst, (as_i16timm $simm16))>;
+
class SOPK_WAITCNT<string opName, list<dag> pat=[]> :
SOPK_Pseudo<
opName,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
index d2c14f2401fc35..99d80b5dd14b33 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
@@ -1433,6 +1433,72 @@ define amdgpu_kernel void @test_setreg_set_4_bits_straddles_round_and_denorm() {
ret void
}
+define amdgpu_ps void @test_63489(i32 inreg %var.mode) {
+; GFX6-LABEL: test_63489:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x80,0xb9]
+; GFX6-NEXT: ;;#ASMSTART
+; GFX6-NEXT: ;;#ASMEND
+; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX789-LABEL: test_63489:
+; GFX789: ; %bb.0:
+; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x00,0xb9]
+; GFX789-NEXT: ;;#ASMSTART
+; GFX789-NEXT: ;;#ASMEND
+; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: test_63489:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x80,0xb9]
+; GFX10-NEXT: ;;#ASMSTART
+; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX11-LABEL: test_63489:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x00,0xb9]
+; GFX11-NEXT: ;;#ASMSTART
+; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
+ call void @llvm.amdgcn.s.setreg(i32 63489, i32 %var.mode)
+ call void asm sideeffect "", ""()
+ ret void
+}
+
+define amdgpu_ps void @test_minus_2047(i32 inreg %var.mode) {
+; GFX6-LABEL: test_minus_2047:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x80,0xb9]
+; GFX6-NEXT: ;;#ASMSTART
+; GFX6-NEXT: ;;#ASMEND
+; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX789-LABEL: test_minus_2047:
+; GFX789: ; %bb.0:
+; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x00,0xb9]
+; GFX789-NEXT: ;;#ASMSTART
+; GFX789-NEXT: ;;#ASMEND
+; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: test_minus_2047:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x80,0xb9]
+; GFX10-NEXT: ;;#ASMSTART
+; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX11-LABEL: test_minus_2047:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x00,0xb9]
+; GFX11-NEXT: ;;#ASMSTART
+; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
+ call void @llvm.amdgcn.s.setreg(i32 -2047, i32 %var.mode)
+ call void asm sideeffect "", ""()
+ ret void
+}
+
; FIXME: Broken for DAG
; define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) {
; call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
More information about the llvm-commits
mailing list