[llvm] 2484f4a - [AMDGPU][True16][CodeGen] Avoid setting hi part in copysign (#160891)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 26 11:43:02 PDT 2025
Author: Piotr Sobczak
Date: 2025-09-26T20:42:59+02:00
New Revision: 2484f4a03755b514239075f2ed4e0c8128f66c88
URL: https://github.com/llvm/llvm-project/commit/2484f4a03755b514239075f2ed4e0c8128f66c88
DIFF: https://github.com/llvm/llvm-project/commit/2484f4a03755b514239075f2ed4e0c8128f66c88.diff
LOG: [AMDGPU][True16][CodeGen] Avoid setting hi part in copysign (#160891)
This is a temporary fix for a regression from #154875.
The new pattern sets the hi part of V_BFI result and that confuses
si-fix-sgpr-copies - where the proper fix is likely to be.
During si-fix-sgpr-copies, an incorrect fold happens:
%86:vgpr_32 = V_BFI_B32_e64
%87:sreg_32 = COPY %86.hi16:vgpr_32
%95:vgpr_32 = nofpexcept V_PACK_B32_F16_t16_e64 0, killed %87:sreg_32,
0, %63:vgpr_16, 0, 0
into
%86:vgpr_32 = V_BFI_B32_e64
%95:vgpr_32 = nofpexcept V_PACK_B32_F16_t16_e64 0, %86.lo16:vgpr_32, 0,
%63:vgpr_16, 0, 0
Fixes: Vulkan CTS dEQP-VK.glsl.builtin.precision_fp16_storage32b.*.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/fcopysign.bf16.ll
llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index e1f1788726ad7..59fd2f10ccacd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2287,8 +2287,9 @@ def : GCNPat <
def : GCNPat <
(fcopysign fp16vt:$src0, f32:$src1),
- (EXTRACT_SUBREG (V_BFI_B32_e64 (S_MOV_B32 (i32 0x7fff0000)),
- (REG_SEQUENCE VGPR_32, (i16 (IMPLICIT_DEF)), lo16, $src0, hi16), $src1), hi16)
+ (EXTRACT_SUBREG (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)),
+ (REG_SEQUENCE VGPR_32, $src0, lo16, (i16 (IMPLICIT_DEF)), hi16),
+ (V_LSHRREV_B32_e64 (i32 16), $src1)), lo16)
>;
def : GCNPat <
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.bf16.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.bf16.ll
index 57cea74b0e9ea..30bcdf97e26fd 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.bf16.ll
@@ -231,22 +231,13 @@ define bfloat @v_copysign_bf16_f32(bfloat %mag, float %sign.f32) {
; GFX10-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11TRUE16-LABEL: v_copysign_bf16_f32:
-; GFX11TRUE16: ; %bb.0:
-; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v1
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
-; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11FAKE16-LABEL: v_copysign_bf16_f32:
-; GFX11FAKE16: ; %bb.0:
-; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX11FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
-; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_copysign_bf16_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%sign = fptrunc float %sign.f32 to bfloat
%op = call bfloat @llvm.copysign.bf16(bfloat %mag, bfloat %sign)
ret bfloat %op
@@ -298,22 +289,13 @@ define bfloat @v_copysign_bf16_f64(bfloat %mag, double %sign.f64) {
; GFX10-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11TRUE16-LABEL: v_copysign_bf16_f64:
-; GFX11TRUE16: ; %bb.0:
-; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v2
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
-; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11FAKE16-LABEL: v_copysign_bf16_f64:
-; GFX11FAKE16: ; %bb.0:
-; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
-; GFX11FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
-; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_copysign_bf16_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%sign = fptrunc double %sign.f64 to bfloat
%op = call bfloat @llvm.copysign.bf16(bfloat %mag, bfloat %sign)
ret bfloat %op
@@ -499,9 +481,10 @@ define amdgpu_ps i32 @s_copysign_bf16_f32(bfloat inreg %mag, float inreg %sign.f
;
; GFX11TRUE16-LABEL: s_copysign_bf16_f32:
; GFX11TRUE16: ; %bb.0:
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e64 v1, 16, s1
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s1
+; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
@@ -575,9 +558,10 @@ define amdgpu_ps i32 @s_copysign_bf16_f64(bfloat inreg %mag, double inreg %sign.
;
; GFX11TRUE16-LABEL: s_copysign_bf16_f64:
; GFX11TRUE16: ; %bb.0:
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e64 v1, 16, s2
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s2
+; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
@@ -3677,9 +3661,10 @@ define amdgpu_ps i16 @s_copysign_out_bf16_mag_bf16_sign_f32(bfloat inreg %mag, f
;
; GFX11TRUE16-LABEL: s_copysign_out_bf16_mag_bf16_sign_f32:
; GFX11TRUE16: ; %bb.0:
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e64 v1, 16, s1
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s1
+; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX11TRUE16-NEXT: ; return to shader part epilog
;
@@ -3744,9 +3729,10 @@ define amdgpu_ps i16 @s_copysign_out_bf16_mag_bf16_sign_f64(bfloat inreg %mag, d
;
; GFX11TRUE16-LABEL: s_copysign_out_bf16_mag_bf16_sign_f64:
; GFX11TRUE16: ; %bb.0:
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e64 v1, 16, s2
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s2
+; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX11TRUE16-NEXT: ; return to shader part epilog
;
@@ -6700,15 +6686,16 @@ define <3 x bfloat> @v_copysign_out_v3bf16_mag_v3bf16_sign_v3f64(<3 x bfloat> %m
; GFX11TRUE16-LABEL: v_copysign_out_v3bf16_mag_v3bf16_sign_v3f64:
; GFX11TRUE16: ; %bb.0:
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
-; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v5
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v5
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v2, v4
+; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v7
+; GFX11TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v3
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fff0000, v1, v3
-; GFX11TRUE16-NEXT: v_bfi_b32 v2, 0x7fff0000, v2, v7
-; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.h
-; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.h
+; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
+; GFX11TRUE16-NEXT: v_bfi_b32 v1, 0x7fff, v1, v4
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: v_copysign_out_v3bf16_mag_v3bf16_sign_v3f64:
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
index fc33ae277df70..62847b15d3443 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
@@ -776,22 +776,13 @@ define half @v_copysign_out_f16_mag_f16_sign_f32(half %mag, float %sign) {
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-TRUE16-LABEL: v_copysign_out_f16_mag_f16_sign_f32:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v1
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_copysign_out_f16_mag_f16_sign_f32:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_copysign_out_f16_mag_f16_sign_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%sign.trunc = fptrunc float %sign to half
%out = call half @llvm.copysign.f16(half %mag, half %sign.trunc)
ret half %out
@@ -823,22 +814,13 @@ define half @v_copysign_out_f16_mag_f16_sign_f64(half %mag, double %sign) {
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-TRUE16-LABEL: v_copysign_out_f16_mag_f16_sign_f64:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v2
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-FAKE16-LABEL: v_copysign_out_f16_mag_f16_sign_f64:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
-; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_copysign_out_f16_mag_f16_sign_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%sign.trunc = fptrunc double %sign to half
%out = call half @llvm.copysign.f16(half %mag, half %sign.trunc)
ret half %out
@@ -2832,9 +2814,10 @@ define amdgpu_ps i16 @s_copysign_out_f16_mag_f16_sign_f32(half inreg %mag, float
;
; GFX11-TRUE16-LABEL: s_copysign_out_f16_mag_f16_sign_f32:
; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e64 v1, 16, s1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s1
+; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -2883,9 +2866,10 @@ define amdgpu_ps i16 @s_copysign_out_f16_mag_f16_sign_f64(half inreg %mag, doubl
;
; GFX11-TRUE16-LABEL: s_copysign_out_f16_mag_f16_sign_f64:
; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e64 v1, 16, s2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, s2
+; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v1
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@@ -5590,15 +5574,16 @@ define <3 x half> @v_copysign_out_v3f16_mag_v3f16_sign_v3f64(<3 x half> %mag, <3
; GFX11-TRUE16-LABEL: v_copysign_out_v3f16_mag_v3f16_sign_v3f64:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
-; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff0000, v0, v5
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v5
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v2, v4
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v7
+; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0x7fff, v0, v3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fff0000, v1, v3
-; GFX11-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff0000, v2, v7
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.h
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.h
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
+; GFX11-TRUE16-NEXT: v_bfi_b32 v1, 0x7fff, v1, v4
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_copysign_out_v3f16_mag_v3f16_sign_v3f64:
More information about the llvm-commits
mailing list