[llvm] 4b1702e - AMDGPU: Fix counting source modifiers as literal constants
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 7 15:40:22 PDT 2023
Author: Matt Arsenault
Date: 2023-08-07T18:40:16-04:00
New Revision: 4b1702e87a2687569b197aea4721353f8b788182
URL: https://github.com/llvm/llvm-project/commit/4b1702e87a2687569b197aea4721353f8b788182
DIFF: https://github.com/llvm/llvm-project/commit/4b1702e87a2687569b197aea4721353f8b788182.diff
LOG: AMDGPU: Fix counting source modifiers as literal constants
This fixes over estimating code size. This was broken by
79f52af4cd9a76485dd50bcdbb5d393eb7a70103.
https://reviews.llvm.org/D157103
Added:
llvm/test/CodeGen/AMDGPU/code-size-estimate.ll
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/idiv-licm.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 281836c009330e..8e875f87021065 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3832,9 +3832,7 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
uint8_t OperandType) const {
assert(!MO.isReg() && "isInlineConstant called on register operand!");
- if (!MO.isImm() ||
- OperandType < AMDGPU::OPERAND_SRC_FIRST ||
- OperandType > AMDGPU::OPERAND_SRC_LAST)
+ if (!MO.isImm())
return false;
// MachineOperand provides no way to tell the true operand size, since it only
@@ -3908,9 +3906,23 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
}
case AMDGPU::OPERAND_KIMM32:
case AMDGPU::OPERAND_KIMM16:
- return false;
+ case AMDGPU::OPERAND_INPUT_MODS:
+ case MCOI::OPERAND_IMMEDIATE:
+ // Always embedded in the instruction for free.
+ return true;
+ case MCOI::OPERAND_UNKNOWN:
+ case MCOI::OPERAND_REGISTER:
+ case MCOI::OPERAND_PCREL:
+ case MCOI::OPERAND_GENERIC_0:
+ case MCOI::OPERAND_GENERIC_1:
+ case MCOI::OPERAND_GENERIC_2:
+ case MCOI::OPERAND_GENERIC_3:
+ case MCOI::OPERAND_GENERIC_4:
+ case MCOI::OPERAND_GENERIC_5:
+ // Just ignore anything else.
+ return true;
default:
- llvm_unreachable("invalid bitwidth");
+ llvm_unreachable("invalid operand type");
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll b/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll
new file mode 100644
index 00000000000000..c37edcbcd292c9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll
@@ -0,0 +1,312 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1030 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX11 %s
+
+declare float @llvm.fabs.f32(float)
+declare float @llvm.fma.f32(float, float, float)
+
+define float @v_mul_f32_vop2(float %x, float %y) {
+; GFX9-LABEL: v_mul_f32_vop2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x0a]
+; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; GFX10-LABEL: v_mul_f32_vop2:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
+; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; GFX11-LABEL: v_mul_f32_vop2:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
+; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+ %mul = fmul float %x, %y
+ ret float %mul
+}
+; CHECK: codeLenInByte = 12
+
+define float @v_mul_f32_vop2_inline_imm(float %x) {
+; GFX9-LABEL: v_mul_f32_vop2_inline_imm:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX9-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x0a]
+; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; GFX10-LABEL: v_mul_f32_vop2_inline_imm:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
+; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; GFX11-LABEL: v_mul_f32_vop2_inline_imm:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
+; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+ %mul = fmul float %x, 4.0
+ ret float %mul
+}
+; CHECK: codeLenInByte = 12
+
+define float @v_mul_f32_vop2_literal(float %x) {
+; GFX9-LABEL: v_mul_f32_vop2_literal:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX9-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x0a,0x00,0x00,0xf6,0x42]
+; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; GFX10-LABEL: v_mul_f32_vop2_literal:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
+; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; GFX11-LABEL: v_mul_f32_vop2_literal:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
+; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+ %mul = fmul float %x, 123.0
+ ret float %mul
+}
+; CHECK: codeLenInByte = 16
+
+define float @v_mul_f32_vop3_src_mods(float %x, float %y) {
+; GFX9-LABEL: v_mul_f32_vop3_src_mods:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0x03,0x02,0x00]
+; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; GFX10-LABEL: v_mul_f32_vop3_src_mods:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
+; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; GFX11-LABEL: v_mul_f32_vop3_src_mods:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
+; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+ %fabs.x = call float @llvm.fabs.f32(float %x)
+ %mul = fmul float %fabs.x, %y
+ ret float %mul
+}
+; CHECK: codeLenInByte = 16
+
+define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) {
+; GFX9-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0xed,0x01,0x00]
+; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; GFX10-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
+; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; GFX11-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
+; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+ %fabs.x = call float @llvm.fabs.f32(float %x)
+ %mul = fmul float %fabs.x, 4.0
+ ret float %mul
+}
+
+; CHECK: codeLenInByte = 16
+
+define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) {
+; GFX9-LABEL: v_mul_f32_vop3_src_mods_literal:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX9-NEXT: s_mov_b32 s4, 0x42f60000 ; encoding: [0xff,0x00,0x84,0xbe,0x00,0x00,0xf6,0x42]
+; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, s4 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0x09,0x00,0x00]
+; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; GFX10-LABEL: v_mul_f32_vop3_src_mods_literal:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
+; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; GFX11-LABEL: v_mul_f32_vop3_src_mods_literal:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
+; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+ %fabs.x = call float @llvm.fabs.f32(float %x)
+ %mul = fmul float %fabs.x, 123.0
+ ret float %mul
+}
+
+; GFX9: codeLenInByte = 24
+; GFX10: codeLenInByte = 20
+
+define float @v_mul_f32_vop2_frame_index(float %x) {
+; GFX9-LABEL: v_mul_f32_vop2_frame_index:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s32 ; encoding: [0x01,0x00,0x10,0xd1,0x86,0x40,0x00,0x00]
+; GFX9-NEXT: v_mul_f32_e32 v0, v1, v0 ; encoding: [0x01,0x01,0x00,0x0a]
+; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; GFX10-LABEL: v_mul_f32_vop2_frame_index:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s32 ; encoding: [0x01,0x00,0x16,0xd5,0x85,0x40,0x00,0x00]
+; GFX10-NEXT: v_mul_f32_e32 v0, v1, v0 ; encoding: [0x01,0x01,0x00,0x10]
+; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; GFX11-LABEL: v_mul_f32_vop2_frame_index:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT: v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10]
+; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+ %alloca = alloca i32, addrspace(5)
+ %ptrtoint = ptrtoint ptr addrspace(5) %alloca to i32
+ %cast = bitcast i32 %ptrtoint to float
+ %mul = fmul float %x, %cast
+ ret float %mul
+}
+
+; GFX9: codeLenInByte = 20
+; GFX10: codeLenInByte = 20
+; GFX11: codeLenInByte = 12
+
+define float @v_fma_f32(float %x, float %y, float %z) {
+; GFX9-LABEL: v_fma_f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX9-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x03,0x0a,0x04]
+; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; GFX10-LABEL: v_fma_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x4b,0xd5,0x00,0x03,0x0a,0x04]
+; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; GFX11-LABEL: v_fma_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04]
+; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+ %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
+ ret float %fma
+}
+
+; CHECK: codeLenInByte = 16
+
+define float @v_fma_f32_src_mods(float %x, float %y, float %z) {
+; GFX9-LABEL: v_fma_f32_src_mods:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX9-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0xcb,0xd1,0x00,0x03,0x0a,0x04]
+; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; GFX10-LABEL: v_fma_f32_src_mods:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x4b,0xd5,0x00,0x03,0x0a,0x04]
+; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; GFX11-LABEL: v_fma_f32_src_mods:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04]
+; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+ %fabs.x = call float @llvm.fabs.f32(float %x)
+ %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z)
+ ret float %fma
+}
+
+; CHECK: codeLenInByte = 16
+
+define float @v_fmac_f32(float %x, float %y) {
+; GFX9-LABEL: v_fmac_f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX9-NEXT: v_fma_f32 v0, v0, v1, v0 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x03,0x02,0x04]
+; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; GFX10-LABEL: v_fmac_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
+; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; GFX11-LABEL: v_fmac_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
+; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+ %fma = call float @llvm.fma.f32(float %x, float %y, float %x)
+ ret float %fma
+}
+
+; GFX9: codeLenInByte = 16
+; GFX10: codeLenInByte = 12
+; GFX11: codeLenInByte = 12
+
+define float @v_fmaak_f32(float %x, float %y) {
+; GFX9-LABEL: v_fmaak_f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX9-NEXT: s_mov_b32 s4, 0x43800000 ; encoding: [0xff,0x00,0x84,0xbe,0x00,0x00,0x80,0x43]
+; GFX9-NEXT: v_fma_f32 v0, v0, v1, s4 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x03,0x12,0x00]
+; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; GFX10-LABEL: v_fmaak_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
+; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; GFX11-LABEL: v_fmaak_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
+; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+ %fma = call float @llvm.fma.f32(float %x, float %y, float 256.0)
+ ret float %fma
+}
+
+; GFX9: codeLenInByte = 24
+; GFX10: codeLenInByte = 16
+; GFX11: codeLenInByte = 16
+
+define float @v_fma_k_f32_src_mods(float %x, float %y) {
+; GFX9-LABEL: v_fma_k_f32_src_mods:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX9-NEXT: s_mov_b32 s4, 0x43800000 ; encoding: [0xff,0x00,0x84,0xbe,0x00,0x00,0x80,0x43]
+; GFX9-NEXT: v_fma_f32 v0, |v0|, v1, s4 ; encoding: [0x00,0x01,0xcb,0xd1,0x00,0x03,0x12,0x00]
+; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; GFX10-LABEL: v_fma_k_f32_src_mods:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT: v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x4b,0xd5,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
+; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; GFX11-LABEL: v_fma_k_f32_src_mods:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
+; GFX11-NEXT: v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
+; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+ %fabs.x = call float @llvm.fabs.f32(float %x)
+ %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float 256.0)
+ ret float %fma
+}
+
+; GFX9: codeLenInByte = 24
+; GFX10: codeLenInByte = 20
+; GFX11: codeLenInByte = 20
diff --git a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
index 97607c5e72d0c3..64060ebbb159ec 100644
--- a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
+++ b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
@@ -848,7 +848,6 @@ define amdgpu_kernel void @urem16_invariant_denom(ptr addrspace(1) nocapture %ar
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_u32_e32 v2, s1
; GFX11-NEXT: v_rcp_iflag_f32_e32 v3, v2
-; GFX11-NEXT: s_set_inst_prefetch_distance 0x1
; GFX11-NEXT: .p2align 6
; GFX11-NEXT: .LBB5_1: ; %bb3
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -875,7 +874,6 @@ define amdgpu_kernel void @urem16_invariant_denom(ptr addrspace(1) nocapture %ar
; GFX11-NEXT: global_store_b16 v[5:6], v0, off
; GFX11-NEXT: s_cbranch_vccz .LBB5_1
; GFX11-NEXT: ; %bb.2: ; %bb2
-; GFX11-NEXT: s_set_inst_prefetch_distance 0x2
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
@@ -1135,7 +1133,6 @@ define amdgpu_kernel void @srem16_invariant_denom(ptr addrspace(1) nocapture %ar
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_f32_i32_e32 v0, s4
; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
-; GFX11-NEXT: s_set_inst_prefetch_distance 0x1
; GFX11-NEXT: .p2align 6
; GFX11-NEXT: .LBB7_1: ; %bb3
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -1170,7 +1167,6 @@ define amdgpu_kernel void @srem16_invariant_denom(ptr addrspace(1) nocapture %ar
; GFX11-NEXT: global_store_b16 v2, v3, s[6:7]
; GFX11-NEXT: s_cbranch_vccz .LBB7_1
; GFX11-NEXT: ; %bb.2: ; %bb2
-; GFX11-NEXT: s_set_inst_prefetch_distance 0x2
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
More information about the llvm-commits
mailing list