[llvm] [AMDGPU][True16][GlobalISel] Fix v2*16 build_vector patterns (PR #151496)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 31 04:02:28 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Mirko BrkuĊĦanin (mbrkusanin)
<details>
<summary>Changes</summary>
- Pattern with IMPLICIT_DEF failed to generate and entry in MatchTable and
did not an report error, just silently failed. This is fixed by casting
IMPLICIT_DEF to appropriate type. This also fixes selecting
"build_vector s16, undef" for GlobalISel with True16.
- Add pattern for "build_vector undef, s16" that will work for GlobalISel.
True16 GlobalISel has a G_TRUNC that it needs to deal with.
- Use REG_SEQUENCE for Real16 patterns instead of V_LSHLREV_B32_e64 to
generate more optimal code.
---
Patch is 49.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151496.diff
10 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+17-3)
- (modified) llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll (+643-150)
- (modified) llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll (+1-3)
- (modified) llvm/test/CodeGen/AMDGPU/fmaximum.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/fminimum.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/global-saddr-load.ll (+2-4)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll (+2-3)
- (modified) llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll (+6-6)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 54fa192aeec92..35605eb5a4bd3 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3543,15 +3543,29 @@ def : GCNPat <
(vecTy (UniformBinFrag<build_vector> (Ty undef), (Ty SReg_32:$src1))),
(S_LSHL_B32 SReg_32:$src1, (i32 16))
>;
-}
def : GCNPat <
(vecTy (DivergentBinFrag<build_vector> (Ty undef), (Ty VGPR_32:$src1))),
(vecTy (V_LSHLREV_B32_e64 (i32 16), VGPR_32:$src1))
>;
-} // End foreach Ty = ...
}
+let True16Predicate = UseRealTrue16Insts in
+def : GCNPat <
+ (vecTy (DivergentBinFrag<build_vector> (Ty undef), (Ty VGPR_32:$src1))),
+ (REG_SEQUENCE VGPR_32, (Ty (IMPLICIT_DEF)), lo16, (Ty VGPR_32:$src1), hi16)
+>;
+
+} // End foreach Ty = ...
+} // End AddedComplexity = 1
+
+let True16Predicate = UseRealTrue16Insts in
+def : GCNPat <
+ (v2i16 (DivergentBinFrag<build_vector> (i16 undef), (i16 (trunc i32:$src1)))),
+ (REG_SEQUENCE VGPR_32, (i16 (IMPLICIT_DEF)), lo16,
+ (i16 (EXTRACT_SUBREG VGPR_32:$src1, lo16)), hi16)
+>;
+
let SubtargetPredicate = HasVOP3PInsts in {
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in
@@ -3599,7 +3613,7 @@ def : GCNPat <
>;
def : GCNPat <
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_16:$src0), (Ty undef))),
- (REG_SEQUENCE VGPR_32, $src0, lo16, (IMPLICIT_DEF), hi16)
+ (REG_SEQUENCE VGPR_32, $src0, lo16, (Ty (IMPLICIT_DEF)), hi16)
>;
}
diff --git a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
index bdb52dbb95fa7..8b82d79a47a1a 100644
--- a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
@@ -1,8 +1,33 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=GFX8,GFX8-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=GFX8,GFX8-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE,GFX11-FAKE16-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE,GFX11-FAKE16-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE,GFX11-TRUE16-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE,GFX11-TRUE16-GISEL %s
define void @undef_lo_v2i16(i16 %arg0) {
+; GFX8-SDAG-LABEL: undef_lo_v2i16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-SDAG-NEXT: ;;#ASMSTART
+; GFX8-SDAG-NEXT: ; use v0
+; GFX8-SDAG-NEXT: ;;#ASMEND
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_lo_v2i16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-GISEL-NEXT: ;;#ASMSTART
+; GFX8-GISEL-NEXT: ; use v0
+; GFX8-GISEL-NEXT: ;;#ASMEND
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: undef_lo_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -12,20 +37,48 @@ define void @undef_lo_v2i16(i16 %arg0) {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: undef_lo_v2i16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use v0
-; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX11-FAKE-LABEL: undef_lo_v2i16:
+; GFX11-FAKE: ; %bb.0:
+; GFX11-FAKE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX11-FAKE-NEXT: ;;#ASMSTART
+; GFX11-FAKE-NEXT: ; use v0
+; GFX11-FAKE-NEXT: ;;#ASMEND
+; GFX11-FAKE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE-LABEL: undef_lo_v2i16:
+; GFX11-TRUE: ; %bb.0:
+; GFX11-TRUE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE-NEXT: ;;#ASMSTART
+; GFX11-TRUE-NEXT: ; use v0
+; GFX11-TRUE-NEXT: ;;#ASMEND
+; GFX11-TRUE-NEXT: s_setpc_b64 s[30:31]
%undef.lo = insertelement <2 x i16> poison, i16 %arg0, i32 1
call void asm sideeffect "; use $0", "v"(<2 x i16> %undef.lo);
ret void
}
define void @undef_lo_v2f16(half %arg0) {
+; GFX8-SDAG-LABEL: undef_lo_v2f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-SDAG-NEXT: ;;#ASMSTART
+; GFX8-SDAG-NEXT: ; use v0
+; GFX8-SDAG-NEXT: ;;#ASMEND
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_lo_v2f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-GISEL-NEXT: ;;#ASMSTART
+; GFX8-GISEL-NEXT: ; use v0
+; GFX8-GISEL-NEXT: ;;#ASMEND
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: undef_lo_v2f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -35,20 +88,52 @@ define void @undef_lo_v2f16(half %arg0) {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: undef_lo_v2f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use v0
-; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX11-FAKE-LABEL: undef_lo_v2f16:
+; GFX11-FAKE: ; %bb.0:
+; GFX11-FAKE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX11-FAKE-NEXT: ;;#ASMSTART
+; GFX11-FAKE-NEXT: ; use v0
+; GFX11-FAKE-NEXT: ;;#ASMEND
+; GFX11-FAKE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE-LABEL: undef_lo_v2f16:
+; GFX11-TRUE: ; %bb.0:
+; GFX11-TRUE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE-NEXT: ;;#ASMSTART
+; GFX11-TRUE-NEXT: ; use v0
+; GFX11-TRUE-NEXT: ;;#ASMEND
+; GFX11-TRUE-NEXT: s_setpc_b64 s[30:31]
%undef.lo = insertelement <2 x half> poison, half %arg0, i32 1
call void asm sideeffect "; use $0", "v"(<2 x half> %undef.lo);
ret void
}
define void @undef_lo_op_v2f16(half %arg0) {
+; GFX8-SDAG-LABEL: undef_lo_op_v2f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, 0x3c00
+; GFX8-SDAG-NEXT: v_add_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, 0x7e00, v0
+; GFX8-SDAG-NEXT: ;;#ASMSTART
+; GFX8-SDAG-NEXT: ; use v0
+; GFX8-SDAG-NEXT: ;;#ASMEND
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_lo_op_v2f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0x3c00
+; GFX8-GISEL-NEXT: v_add_f16_e64 v1, s4, 1.0
+; GFX8-GISEL-NEXT: v_add_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
+; GFX8-GISEL-NEXT: ;;#ASMSTART
+; GFX8-GISEL-NEXT: ; use v0
+; GFX8-GISEL-NEXT: ;;#ASMEND
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: undef_lo_op_v2f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -59,16 +144,27 @@ define void @undef_lo_op_v2f16(half %arg0) {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: undef_lo_op_v2f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v1, 0x3c00
-; GFX8-NEXT: v_add_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_e32 v0, 0x7e00, v0
-; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use v0
-; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX11-FAKE-LABEL: undef_lo_op_v2f16:
+; GFX11-FAKE: ; %bb.0:
+; GFX11-FAKE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX11-FAKE-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GFX11-FAKE-NEXT: ;;#ASMSTART
+; GFX11-FAKE-NEXT: ; use v0
+; GFX11-FAKE-NEXT: ;;#ASMEND
+; GFX11-FAKE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE-LABEL: undef_lo_op_v2f16:
+; GFX11-TRUE: ; %bb.0:
+; GFX11-TRUE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GFX11-TRUE-NEXT: ;;#ASMSTART
+; GFX11-TRUE-NEXT: ; use v0
+; GFX11-TRUE-NEXT: ;;#ASMEND
+; GFX11-TRUE-NEXT: s_setpc_b64 s[30:31]
%undef.lo = insertelement <2 x half> poison, half %arg0, i32 1
%op = fadd <2 x half> %undef.lo, <half 1.0, half 1.0>
call void asm sideeffect "; use $0", "v"(<2 x half> %op);
@@ -76,26 +172,93 @@ define void @undef_lo_op_v2f16(half %arg0) {
}
define void @undef_lo_op_v2i16(i16 %arg0) {
-; GFX9-LABEL: undef_lo_op_v2i16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: s_movk_i32 s4, 0x63
-; GFX9-NEXT: v_pk_add_u16 v0, v0, s4 op_sel_hi:[1,0]
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use v0
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX8-SDAG-LABEL: undef_lo_op_v2i16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, 0x63
+; GFX8-SDAG-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT: ;;#ASMSTART
+; GFX8-SDAG-NEXT: ; use v0
+; GFX8-SDAG-NEXT: ;;#ASMEND
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: undef_lo_op_v2i16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v1, 0x63
-; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use v0
-; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX8-GISEL-LABEL: undef_lo_op_v2i16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0x63
+; GFX8-GISEL-NEXT: s_and_b32 s4, 0xffff, s4
+; GFX8-GISEL-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, s4, v0
+; GFX8-GISEL-NEXT: ;;#ASMSTART
+; GFX8-GISEL-NEXT: ; use v0
+; GFX8-GISEL-NEXT: ;;#ASMEND
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: undef_lo_op_v2i16:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x63
+; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v0, s4 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: ;;#ASMSTART
+; GFX9-SDAG-NEXT: ; use v0
+; GFX9-SDAG-NEXT: ;;#ASMEND
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: undef_lo_op_v2i16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x630063
+; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX9-GISEL-NEXT: ;;#ASMSTART
+; GFX9-GISEL-NEXT: ; use v0
+; GFX9-GISEL-NEXT: ;;#ASMEND
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: undef_lo_op_v2i16:
+; GFX11-FAKE16-SDAG: ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT: v_pk_add_u16 v0, 0x63, v0 op_sel_hi:[0,1]
+; GFX11-FAKE16-SDAG-NEXT: ;;#ASMSTART
+; GFX11-FAKE16-SDAG-NEXT: ; use v0
+; GFX11-FAKE16-SDAG-NEXT: ;;#ASMEND
+; GFX11-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: undef_lo_op_v2i16:
+; GFX11-FAKE16-GISEL: ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT: v_pk_add_u16 v0, 0x630063, v0
+; GFX11-FAKE16-GISEL-NEXT: ;;#ASMSTART
+; GFX11-FAKE16-GISEL-NEXT: ; use v0
+; GFX11-FAKE16-GISEL-NEXT: ;;#ASMEND
+; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: undef_lo_op_v2i16:
+; GFX11-TRUE16-SDAG: ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT: v_pk_add_u16 v0, 0x63, v0 op_sel_hi:[0,1]
+; GFX11-TRUE16-SDAG-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-SDAG-NEXT: ; use v0
+; GFX11-TRUE16-SDAG-NEXT: ;;#ASMEND
+; GFX11-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: undef_lo_op_v2i16:
+; GFX11-TRUE16-GISEL: ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT: v_pk_add_u16 v0, 0x630063, v0
+; GFX11-TRUE16-GISEL-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-GISEL-NEXT: ; use v0
+; GFX11-TRUE16-GISEL-NEXT: ;;#ASMEND
+; GFX11-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
%undef.lo = insertelement <2 x i16> poison, i16 %arg0, i32 1
%op = add <2 x i16> %undef.lo, <i16 99, i16 99>
call void asm sideeffect "; use $0", "v"(<2 x i16> %op);
@@ -103,6 +266,26 @@ define void @undef_lo_op_v2i16(i16 %arg0) {
}
define void @undef_lo3_v4i16(i16 %arg0) {
+; GFX8-SDAG-LABEL: undef_lo3_v4i16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-SDAG-NEXT: ;;#ASMSTART
+; GFX8-SDAG-NEXT: ; use v[0:1]
+; GFX8-SDAG-NEXT: ;;#ASMEND
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_lo3_v4i16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX8-GISEL-NEXT: ;;#ASMSTART
+; GFX8-GISEL-NEXT: ; use v[0:1]
+; GFX8-GISEL-NEXT: ;;#ASMEND
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: undef_lo3_v4i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -112,20 +295,49 @@ define void @undef_lo3_v4i16(i16 %arg0) {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: undef_lo3_v4i16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use v[0:1]
-; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX11-FAKE-LABEL: undef_lo3_v4i16:
+; GFX11-FAKE: ; %bb.0:
+; GFX11-FAKE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX11-FAKE-NEXT: ;;#ASMSTART
+; GFX11-FAKE-NEXT: ; use v[0:1]
+; GFX11-FAKE-NEXT: ;;#ASMEND
+; GFX11-FAKE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE-LABEL: undef_lo3_v4i16:
+; GFX11-TRUE: ; %bb.0:
+; GFX11-TRUE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE-NEXT: ;;#ASMSTART
+; GFX11-TRUE-NEXT: ; use v[0:1]
+; GFX11-TRUE-NEXT: ;;#ASMEND
+; GFX11-TRUE-NEXT: s_setpc_b64 s[30:31]
%undef.lo = insertelement <4 x i16> poison, i16 %arg0, i32 1
call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo);
ret void
}
define void @undef_lo3_v4f16(half %arg0) {
+; GFX8-SDAG-LABEL: undef_lo3_v4f16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-SDAG-NEXT: ;;#ASMSTART
+; GFX8-SDAG-NEXT: ; use v[0:1]
+; GFX8-SDAG-NEXT: ;;#ASMEND
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_lo3_v4f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX8-GISEL-NEXT: ;;#ASMSTART
+; GFX8-GISEL-NEXT: ; use v[0:1]
+; GFX8-GISEL-NEXT: ;;#ASMEND
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: undef_lo3_v4f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -135,20 +347,50 @@ define void @undef_lo3_v4f16(half %arg0) {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: undef_lo3_v4f16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX8-NEXT: ;;#ASMSTART
-; GFX8-NEXT: ; use v[0:1]
-; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX11-FAKE-LABEL: undef_lo3_v4f16:
+; GFX11-FAKE: ; %bb.0:
+; GFX11-FAKE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX11-FAKE-NEXT: ;;#ASMSTART
+; GFX11-FAKE-NEXT: ; use v[0:1]
+; GFX11-FAKE-NEXT: ;;#ASMEND
+; GFX11-FAKE-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE-LABEL: undef_lo3_v4f16:
+; GFX11-TRUE: ; %bb.0:
+; GFX11-TRUE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE-NEXT: ;;#ASMSTART
+; GFX11-TRUE-NEXT: ; use v[0:1]
+; GFX11-TRUE-NEXT: ;;#ASMEND
+; GFX11-TRUE-NEXT: s_setpc_b64 s[30:31]
%undef.lo = insertelement <4 x half> poison, half %arg0, i32 1
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo);
ret void
}
define void @undef_lo2_v4i16(<2 x i16> %arg0) {
+; GFX8-SDAG-LABEL: undef_lo2_v4i16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX8-SDAG-NEXT: v_alignbit_b32 v0, v1, v0, 16
+; GFX8-SDAG-NEXT: ;;#ASMSTART
+; GFX8-SDAG-NEXT: ; use v[0:1]
+; GFX8-SDAG-NEXT: ;;#ASMEND
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_lo2_v4i16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX8-GISEL-NEXT: v_alignbit_b32 v0, v1, v0, 16
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX8-GISEL-NEXT: ;;#ASMSTART
+; GFX8-GISEL-NEXT: ; use v[0:1]
+; GFX8-GISEL-NEXT: ;;#ASM...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/151496
More information about the llvm-commits
mailing list