[llvm] 0892a96 - AMDGPU: Optimize s_setreg_b32 to s_denorm_mode/s_round_mode
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri May 29 18:18:55 PDT 2020
Author: Matt Arsenault
Date: 2020-05-29T21:11:36-04:00
New Revision: 0892a96a05a8943457a4a3e2547923087aa06226
URL: https://github.com/llvm/llvm-project/commit/0892a96a05a8943457a4a3e2547923087aa06226
DIFF: https://github.com/llvm/llvm-project/commit/0892a96a05a8943457a4a3e2547923087aa06226.diff
LOG: AMDGPU: Optimize s_setreg_b32 to s_denorm_mode/s_round_mode
This is a custom inserter because it was less work than teaching
tablegen a way to indicate that it is sometimes OK to have a no side
effect instruction in the output of a side effecting pattern.
The asm is needed to look like a read of the mode register to prevent
it from being deleted. However, there seems to be a bug where the mode
register def instructions are moved across the asm sideeffect by the
post-RA scheduler.
Another oddity is the immediate is formatted differently between
s_denorm_mode and s_round_mode.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIDefines.h
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SOPInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 23ef56afc39c..c8d1542f2a1a 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -366,6 +366,28 @@ enum Width : unsigned {
WIDTH_DEFAULT_ = WIDTH_M1_DEFAULT_ + 1,
};
+enum ModeRegisterMasks : uint32_t {
+ FP_ROUND_MASK = 0xf << 0, // Bits 0..3
+ FP_DENORM_MASK = 0xf << 4, // Bits 4..7
+ DX10_CLAMP_MASK = 1 << 8,
+ IEEE_MODE_MASK = 1 << 9,
+ LOD_CLAMP_MASK = 1 << 10,
+ DEBUG_MASK = 1 << 11,
+
+ // EXCP_EN fields.
+ EXCP_EN_INVALID_MASK = 1 << 12,
+ EXCP_EN_INPUT_DENORMAL_MASK = 1 << 13,
+ EXCP_EN_FLOAT_DIV0_MASK = 1 << 14,
+ EXCP_EN_OVERFLOW_MASK = 1 << 15,
+ EXCP_EN_UNDERFLOW_MASK = 1 << 16,
+ EXCP_EN_INEXACT_MASK = 1 << 17,
+ EXCP_EN_INT_DIV0_MASK = 1 << 18,
+
+ GPR_IDX_EN_MASK = 1 << 27,
+ VSKIP_MASK = 1 << 28,
+ CSP_MASK = 0x7u << 29 // Bits 29..31
+};
+
} // namespace Hwreg
namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32.
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 619ce1abeb81..452ff785ec06 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4119,6 +4119,75 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
}
return emitGWSMemViolTestLoop(MI, BB);
+ case AMDGPU::S_SETREG_B32: {
+ if (!getSubtarget()->hasDenormModeInst())
+ return BB;
+
+ // Try to optimize cases that only set the denormal mode or rounding mode.
+ //
+ // If the s_setreg_b32 fully sets all of the bits in the rounding mode or
+ // denormal mode to a constant, we can use s_round_mode or s_denorm_mode
+ // instead.
+ //
+ // FIXME: This could be predicates on the immediate, but tablegen doesn't
+ // allow you to have a no side effect instruction in the output of a
+ // sideeffecting pattern.
+
+ // TODO: Should also emit a no side effects pseudo if only FP bits are
+ // touched, even if not all of them or to a variable.
+ unsigned ID, Offset, Width;
+ AMDGPU::Hwreg::decodeHwreg(MI.getOperand(1).getImm(), ID, Offset, Width);
+ if (ID != AMDGPU::Hwreg::ID_MODE)
+ return BB;
+
+ const unsigned WidthMask = maskTrailingOnes<unsigned>(Width);
+ const unsigned SetMask = WidthMask << Offset;
+ unsigned SetDenormOp = 0;
+ unsigned SetRoundOp = 0;
+
+ // The dedicated instructions can only set the whole denorm or round mode at
+ // once, not a subset of bits in either.
+ if (Width == 8 && (SetMask & (AMDGPU::Hwreg::FP_ROUND_MASK |
+ AMDGPU::Hwreg::FP_DENORM_MASK)) == SetMask) {
+ // If this fully sets both the round and denorm mode, emit the two
+ // dedicated instructions for these.
+ assert(Offset == 0);
+ SetRoundOp = AMDGPU::S_ROUND_MODE;
+ SetDenormOp = AMDGPU::S_DENORM_MODE;
+ } else if (Width == 4) {
+ if ((SetMask & AMDGPU::Hwreg::FP_ROUND_MASK) == SetMask) {
+ SetRoundOp = AMDGPU::S_ROUND_MODE;
+ assert(Offset == 0);
+ } else if ((SetMask & AMDGPU::Hwreg::FP_DENORM_MASK) == SetMask) {
+ SetDenormOp = AMDGPU::S_DENORM_MODE;
+ assert(Offset == 4);
+ }
+ }
+
+ if (SetRoundOp || SetDenormOp) {
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ MachineInstr *Def = MRI.getVRegDef(MI.getOperand(0).getReg());
+ if (Def && Def->isMoveImmediate() && Def->getOperand(1).isImm()) {
+ unsigned ImmVal = Def->getOperand(1).getImm();
+ if (SetRoundOp) {
+ BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetRoundOp))
+ .addImm(ImmVal & 0xf);
+
+ // If we also have the denorm mode, get just the denorm mode bits.
+ ImmVal >>= 4;
+ }
+
+ if (SetDenormOp) {
+ BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetDenormOp))
+ .addImm(ImmVal & 0xf);
+ }
+
+ MI.eraseFromParent();
+ }
+ }
+
+ return BB;
+ }
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
}
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index dbafea5a1347..774b9cf02785 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -808,6 +808,10 @@ def S_SETREG_B32 : SOPK_Pseudo <
(outs), (ins SReg_32:$sdst, hwreg:$simm16),
"$simm16, $sdst",
[(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> {
+
+ // Use custom inserter to optimize some cases to
+ // S_DENORM_MODE/S_ROUND_MODE.
+ let usesCustomInserter = 1;
let Defs = [MODE];
let Uses = [MODE];
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
index 72de32e5a5ff..531495c53b5c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
@@ -309,7 +309,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_0() {
; GFX10-LABEL: test_setreg_full_round_mode_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
+; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -329,7 +329,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_1() {
; GFX10-LABEL: test_setreg_full_round_mode_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1
+; GFX10-NEXT: s_round_mode 0x1
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -349,7 +349,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_2() {
; GFX10-LABEL: test_setreg_full_round_mode_2:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2
+; GFX10-NEXT: s_round_mode 0x2
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -369,7 +369,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_4() {
; GFX10-LABEL: test_setreg_full_round_mode_4:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4
+; GFX10-NEXT: s_round_mode 0x4
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -389,7 +389,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_8() {
; GFX10-LABEL: test_setreg_full_round_mode_8:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8
+; GFX10-NEXT: s_round_mode 0x8
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -409,7 +409,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_15() {
; GFX10-LABEL: test_setreg_full_round_mode_15:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
+; GFX10-NEXT: s_round_mode 0xf
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -430,7 +430,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_42() {
; GFX10-LABEL: test_setreg_full_round_mode_42:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 42
+; GFX10-NEXT: s_round_mode 0xa
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -450,7 +450,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_0() {
; GFX10-LABEL: test_setreg_full_denorm_mode_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 0
+; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -470,7 +470,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_1() {
; GFX10-LABEL: test_setreg_full_denorm_mode_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 1
+; GFX10-NEXT: s_denorm_mode 1
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -491,7 +491,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_2() {
; GFX10-LABEL: test_setreg_full_denorm_mode_2:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 2
+; GFX10-NEXT: s_denorm_mode 2
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -511,7 +511,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_4() {
; GFX10-LABEL: test_setreg_full_denorm_mode_4:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 4
+; GFX10-NEXT: s_denorm_mode 4
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -531,7 +531,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_8() {
; GFX10-LABEL: test_setreg_full_denorm_mode_8:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 8
+; GFX10-NEXT: s_denorm_mode 8
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -551,7 +551,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_15() {
; GFX10-LABEL: test_setreg_full_denorm_mode_15:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 15
+; GFX10-NEXT: s_denorm_mode 15
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -571,7 +571,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_42() {
; GFX10-LABEL: test_setreg_full_denorm_mode_42:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 42
+; GFX10-NEXT: s_denorm_mode 10
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -591,10 +591,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_0()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_0:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 0)
call void asm sideeffect "", ""()
@@ -611,10 +612,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_1()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_1:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x1
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 1
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 1)
call void asm sideeffect "", ""()
@@ -631,10 +633,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_2()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_2:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x2
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 2
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 2)
call void asm sideeffect "", ""()
@@ -651,10 +654,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_4()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_4:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x4
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 4
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 4)
call void asm sideeffect "", ""()
@@ -671,10 +675,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_8()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_8:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x8
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 8
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 8)
call void asm sideeffect "", ""()
@@ -691,10 +696,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_16()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_16:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 16
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 1
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 16)
call void asm sideeffect "", ""()
@@ -711,10 +717,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_32()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_32:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 32
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 2
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 32)
call void asm sideeffect "", ""()
@@ -731,10 +738,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_64()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_64:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 64
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 4
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 64)
call void asm sideeffect "", ""()
@@ -751,10 +759,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_128(
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_128:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x80
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 8
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 128)
call void asm sideeffect "", ""()
@@ -771,10 +780,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_15()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_15:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0xf
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 15
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 15)
call void asm sideeffect "", ""()
@@ -791,10 +801,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_255(
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_255:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0xf
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0xff
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 15
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 255)
call void asm sideeffect "", ""()
@@ -812,10 +823,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_597(
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_597:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x5
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x255
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 5
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 597)
call void asm sideeffect "", ""()
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
index 934e39e5987f..515b41d066c6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
@@ -309,7 +309,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_0() {
; GFX10-LABEL: test_setreg_full_round_mode_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
+; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -329,7 +329,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_1() {
; GFX10-LABEL: test_setreg_full_round_mode_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1
+; GFX10-NEXT: s_round_mode 0x1
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -349,7 +349,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_2() {
; GFX10-LABEL: test_setreg_full_round_mode_2:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2
+; GFX10-NEXT: s_round_mode 0x2
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -369,7 +369,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_4() {
; GFX10-LABEL: test_setreg_full_round_mode_4:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4
+; GFX10-NEXT: s_round_mode 0x4
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -389,7 +389,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_8() {
; GFX10-LABEL: test_setreg_full_round_mode_8:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8
+; GFX10-NEXT: s_round_mode 0x8
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -409,7 +409,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_15() {
; GFX10-LABEL: test_setreg_full_round_mode_15:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
+; GFX10-NEXT: s_round_mode 0xf
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -430,7 +430,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_42() {
; GFX10-LABEL: test_setreg_full_round_mode_42:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 42
+; GFX10-NEXT: s_round_mode 0xa
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -450,7 +450,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_0() {
; GFX10-LABEL: test_setreg_full_denorm_mode_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 0
+; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -470,7 +470,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_1() {
; GFX10-LABEL: test_setreg_full_denorm_mode_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 1
+; GFX10-NEXT: s_denorm_mode 1
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -491,7 +491,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_2() {
; GFX10-LABEL: test_setreg_full_denorm_mode_2:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 2
+; GFX10-NEXT: s_denorm_mode 2
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -511,7 +511,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_4() {
; GFX10-LABEL: test_setreg_full_denorm_mode_4:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 4
+; GFX10-NEXT: s_denorm_mode 4
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -531,7 +531,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_8() {
; GFX10-LABEL: test_setreg_full_denorm_mode_8:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 8
+; GFX10-NEXT: s_denorm_mode 8
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -551,7 +551,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_15() {
; GFX10-LABEL: test_setreg_full_denorm_mode_15:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 15
+; GFX10-NEXT: s_denorm_mode 15
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -571,7 +571,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_42() {
; GFX10-LABEL: test_setreg_full_denorm_mode_42:
; GFX10: ; %bb.0:
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 42
+; GFX10-NEXT: s_denorm_mode 10
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
@@ -591,10 +591,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_0()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_0:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 0)
call void asm sideeffect "", ""()
@@ -611,10 +612,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_1()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_1:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x1
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 1
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 1)
call void asm sideeffect "", ""()
@@ -631,10 +633,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_2()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_2:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x2
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 2
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 2)
call void asm sideeffect "", ""()
@@ -651,10 +654,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_4()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_4:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x4
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 4
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 4)
call void asm sideeffect "", ""()
@@ -671,10 +675,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_8()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_8:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x8
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 8
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 8)
call void asm sideeffect "", ""()
@@ -691,10 +696,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_16()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_16:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 16
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 1
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 16)
call void asm sideeffect "", ""()
@@ -711,10 +717,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_32()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_32:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 32
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 2
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 32)
call void asm sideeffect "", ""()
@@ -731,10 +738,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_64()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_64:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 64
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 4
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 64)
call void asm sideeffect "", ""()
@@ -751,10 +759,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_128(
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_128:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x80
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 8
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 128)
call void asm sideeffect "", ""()
@@ -771,10 +780,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_15()
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_15:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0xf
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 15
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 0
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 15)
call void asm sideeffect "", ""()
@@ -791,10 +801,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_255(
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_255:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0xf
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0xff
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 15
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 255)
call void asm sideeffect "", ""()
@@ -812,10 +823,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_597(
;
; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_597:
; GFX10: ; %bb.0:
+; GFX10-NEXT: s_round_mode 0x5
; GFX10-NEXT: ; implicit-def: $vcc_hi
-; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x255
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_denorm_mode 5
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.s.setreg(i32 14337, i32 597)
call void asm sideeffect "", ""()
More information about the llvm-commits
mailing list