[llvm-branch-commits] [llvm] AMDGPU: Fix overly conservative immediate operand check (PR #127563)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Feb 17 20:27:30 PST 2025
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/127563
>From 2f31f251066060c70674499b28be8ab4f438aee5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 17 Feb 2025 22:31:48 +0700
Subject: [PATCH] AMDGPU: Fix overly conservative immediate operand check
The real legality check is peformed later anyway, so this was
unnecessarily blocking immediate folds in handled cases.
This also stops folding s_fmac_f32 to s_fmamk_f32 in a few tests,
but that seems better. The globalisel changes look suspicious,
it may be mishandling constants for VOP3P instructions.
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 3 ++-
llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll | 16 ++++------------
llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll | 16 ++++------------
llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll | 4 +---
llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll | 6 ++----
llvm/test/CodeGen/AMDGPU/constrained-shift.ll | 6 ++----
.../CodeGen/AMDGPU/fold-operands-scalar-fmac.mir | 4 ++--
llvm/test/CodeGen/AMDGPU/global-saddr-load.ll | 5 +----
llvm/test/CodeGen/AMDGPU/packed-fp32.ll | 10 +++++-----
llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll | 4 ++--
10 files changed, 25 insertions(+), 49 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 84773349e0ca0..cbd858b9002ee 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -830,7 +830,8 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
if (UseOpIdx >= Desc.getNumOperands())
return false;
- if (!AMDGPU::isSISrcInlinableOperand(Desc, UseOpIdx))
+ // Filter out unhandled pseudos.
+ if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx))
return false;
uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll
index 4be00fedb972e..89078f20f1d47 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll
@@ -920,9 +920,7 @@ define amdgpu_ps i64 @s_andn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
; GFX6-NEXT: s_or_b32 s3, s3, s4
-; GFX6-NEXT: s_mov_b32 s4, -1
-; GFX6-NEXT: s_mov_b32 s5, s4
-; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GFX6-NEXT: ; return to shader part epilog
;
@@ -962,9 +960,7 @@ define amdgpu_ps i64 @s_andn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inr
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
; GFX6-NEXT: s_or_b32 s3, s3, s4
-; GFX6-NEXT: s_mov_b32 s4, -1
-; GFX6-NEXT: s_mov_b32 s5, s4
-; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
; GFX6-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
; GFX6-NEXT: ; return to shader part epilog
;
@@ -1004,9 +1000,7 @@ define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_use(<4 x i16> inreg %src0, <4
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
; GFX6-NEXT: s_or_b32 s3, s3, s4
-; GFX6-NEXT: s_mov_b32 s4, -1
-; GFX6-NEXT: s_mov_b32 s5, s4
-; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GFX6-NEXT: ; return to shader part epilog
;
@@ -1060,9 +1054,7 @@ define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_foldable_use(<4 x i16> inreg
; GFX6-NEXT: s_lshl_b32 s5, s13, 16
; GFX6-NEXT: s_and_b32 s6, s12, 0xffff
; GFX6-NEXT: s_or_b32 s5, s5, s6
-; GFX6-NEXT: s_mov_b32 s6, -1
-; GFX6-NEXT: s_mov_b32 s7, s6
-; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7]
+; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], -1
; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
; GFX6-NEXT: s_and_b64 s[2:3], s[2:3], s[4:5]
; GFX6-NEXT: ; return to shader part epilog
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll
index e7119c89ac06c..065fadf3b5ef3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll
@@ -919,9 +919,7 @@ define amdgpu_ps i64 @s_orn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1)
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
; GFX6-NEXT: s_or_b32 s3, s3, s4
-; GFX6-NEXT: s_mov_b32 s4, -1
-; GFX6-NEXT: s_mov_b32 s5, s4
-; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
; GFX6-NEXT: ; return to shader part epilog
;
@@ -961,9 +959,7 @@ define amdgpu_ps i64 @s_orn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inre
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
; GFX6-NEXT: s_or_b32 s3, s3, s4
-; GFX6-NEXT: s_mov_b32 s4, -1
-; GFX6-NEXT: s_mov_b32 s5, s4
-; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GFX6-NEXT: ; return to shader part epilog
;
@@ -1003,9 +999,7 @@ define amdgpu_ps { i64, i64 } @s_orn2_v4i16_multi_use(<4 x i16> inreg %src0, <4
; GFX6-NEXT: s_lshl_b32 s3, s9, 16
; GFX6-NEXT: s_and_b32 s4, s8, 0xffff
; GFX6-NEXT: s_or_b32 s3, s3, s4
-; GFX6-NEXT: s_mov_b32 s4, -1
-; GFX6-NEXT: s_mov_b32 s5, s4
-; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], -1
; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
; GFX6-NEXT: ; return to shader part epilog
;
@@ -1059,9 +1053,7 @@ define amdgpu_ps { i64, i64 } @s_orn2_v4i16_multi_foldable_use(<4 x i16> inreg %
; GFX6-NEXT: s_lshl_b32 s5, s13, 16
; GFX6-NEXT: s_and_b32 s6, s12, 0xffff
; GFX6-NEXT: s_or_b32 s5, s5, s6
-; GFX6-NEXT: s_mov_b32 s6, -1
-; GFX6-NEXT: s_mov_b32 s7, s6
-; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7]
+; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], -1
; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
; GFX6-NEXT: ; return to shader part epilog
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
index ed85fb19d9051..43322b1e23412 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
@@ -118,13 +118,11 @@ define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> in
; GFX7-NEXT: s_xor_b64 s[2:3], s[2:3], s[6:7]
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
-; GFX7-NEXT: s_mov_b32 s8, -1
; GFX7-NEXT: s_or_b32 s0, s1, s0
; GFX7-NEXT: s_lshl_b32 s1, s3, 16
; GFX7-NEXT: s_and_b32 s2, s2, 0xffff
-; GFX7-NEXT: s_mov_b32 s9, s8
; GFX7-NEXT: s_or_b32 s1, s1, s2
-; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[8:9]
+; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], -1
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: scalar_xnor_v4i16_one_use:
diff --git a/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll b/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll
index f6fc69a6e3e47..ea93e3ac1e595 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll
+++ b/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll
@@ -5,16 +5,14 @@ define amdgpu_cs <2 x i32> @f() {
; CHECK-LABEL: f:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_mov_b32 s4, 0
+; CHECK-NEXT: s_mov_b32 s1, 0
; CHECK-NEXT: s_mov_b32 s5, s4
; CHECK-NEXT: s_mov_b32 s6, s4
; CHECK-NEXT: s_mov_b32 s7, s4
-; CHECK-NEXT: s_mov_b32 s0, s4
; CHECK-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
-; CHECK-NEXT: s_mov_b32 s1, s4
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[0:1], v[0:1]
+; CHECK-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[0:1]
; CHECK-NEXT: v_mov_b32_e32 v1, s4
-; CHECK-NEXT: s_mov_b32 s1, 0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
diff --git a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll
index 4011c21af6904..661af021e8a84 100644
--- a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll
+++ b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll
@@ -192,10 +192,8 @@ define amdgpu_ps <4 x i32> @s_csh_v4i32(<4 x i32> inreg %a, <4 x i32> inreg %b)
;
; GISEL-LABEL: s_csh_v4i32:
; GISEL: ; %bb.0:
-; GISEL-NEXT: s_mov_b32 s8, 31
-; GISEL-NEXT: s_mov_b32 s9, s8
-; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
-; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], s[8:9]
+; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], 31
+; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], 31
; GISEL-NEXT: s_lshl_b32 s8, s0, s4
; GISEL-NEXT: s_lshl_b32 s9, s1, s5
; GISEL-NEXT: s_lshl_b32 s10, s2, s6
diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir
index 08693ec9db1d4..aeca4398f9a83 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir
@@ -13,7 +13,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1056964608, [[COPY1]], implicit $mode
+ ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAC_F32 1056964608, [[COPY]], [[COPY1]], implicit $mode
; CHECK-NEXT: $sgpr0 = COPY %fma
%0:sreg_32 = COPY $sgpr0
%1:sreg_32 = COPY $sgpr1
@@ -33,7 +33,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1056964608, [[COPY1]], implicit $mode
+ ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAC_F32 [[COPY]], 1056964608, [[COPY1]], implicit $mode
; CHECK-NEXT: $sgpr0 = COPY %fma
%0:sreg_32 = COPY $sgpr0
%1:sreg_32 = COPY $sgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
index 492a30b67089c..bc49f70cbee11 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
@@ -742,10 +742,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_0x100000001(ptr addrspace(1)
;
; GFX12-SDAG-LABEL: global_load_saddr_i8_offset_0x100000001:
; GFX12-SDAG: ; %bb.0:
-; GFX12-SDAG-NEXT: s_mov_b32 s0, 1
-; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX12-SDAG-NEXT: s_mov_b32 s1, s0
-; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1]
+; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], 1
; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
diff --git a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
index b59f3c0d410f8..9b03a72fd826d 100644
--- a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
+++ b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
@@ -87,7 +87,7 @@ define amdgpu_kernel void @fadd_v2_v_v_splat(ptr addrspace(1) %a) {
; GCN-LABEL: {{^}}fadd_v2_v_lit_splat:
; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 1.0 op_sel_hi:[1,0]{{$}}
-; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
+; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 1.0{{$}}
define amdgpu_kernel void @fadd_v2_v_lit_splat(ptr addrspace(1) %a) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
@@ -308,7 +308,7 @@ define amdgpu_kernel void @fmul_v2_v_v_splat(ptr addrspace(1) %a) {
; GCN-LABEL: {{^}}fmul_v2_v_lit_splat:
; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
; PACKED-SDAG: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4.0 op_sel_hi:[1,0]{{$}}
-; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
+; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4.0{{$}}
define amdgpu_kernel void @fmul_v2_v_lit_splat(ptr addrspace(1) %a) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
@@ -432,7 +432,7 @@ define amdgpu_kernel void @fma_v2_v_v_splat(ptr addrspace(1) %a) {
; GCN-LABEL: {{^}}fma_v2_v_lit_splat:
; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, 4.0, 1.0
; PACKED-SDAG: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4.0, 1.0 op_sel_hi:[1,0,0]{{$}}
-; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
+; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4.0, 1.0{{$}}
define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
@@ -556,8 +556,8 @@ bb:
; PACKED-SDAG: v_add_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, 0
; PACKED-SDAG: v_add_f32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
-; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
-; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
+; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], 0{{$}}
+; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 0{{$}}
define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
bb:
%i12 = fadd <2 x float> zeroinitializer, %arg
diff --git a/llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll b/llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll
index 81d792183dc06..debbfce7dadcc 100644
--- a/llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll
+++ b/llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll
@@ -218,7 +218,7 @@ define amdgpu_ps float @_amdgpu_ps_main() {
; GFX1150-NEXT: s_mov_b32 s3, s0
; GFX1150-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x0
; GFX1150-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1150-NEXT: s_fmamk_f32 s0, s1, 0x40800000, s0
+; GFX1150-NEXT: s_fmac_f32 s0, s1, 4.0
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX1150-NEXT: v_mov_b32_e32 v0, s0
; GFX1150-NEXT: ; return to shader part epilog
@@ -232,7 +232,7 @@ define amdgpu_ps float @_amdgpu_ps_main() {
; GFX12-NEXT: s_mov_b32 s3, s0
; GFX12-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_fmamk_f32 s0, s1, 0x40800000, s0
+; GFX12-NEXT: s_fmac_f32 s0, s1, 4.0
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
More information about the llvm-branch-commits
mailing list