[llvm] fix vgprimm for t16 (PR #131021)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 13 11:20:40 PDT 2025
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/131021
>From 3a6d39951f4afbc05110d22efc47e901d7386363 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Wed, 12 Mar 2025 16:14:58 -0400
Subject: [PATCH] fix vgprimm for t16
---
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 5 +
llvm/lib/Target/AMDGPU/SIInstructions.td | 2 +-
llvm/test/CodeGen/AMDGPU/br_cc.f16.ll | 207 ++++++++++++++++-------
3 files changed, 148 insertions(+), 66 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 403c657c64053..23a7f508dcda2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -846,6 +846,11 @@ def cond_as_i32imm: SDNodeXForm<cond, [{
}]>;
// Copied from the AArch64 backend:
+def bitcast_fpimm_to_i16 : SDNodeXForm<fpimm, [{
+return CurDAG->getTargetConstant(
+ N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i16);
+}]>;
+
def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
return CurDAG->getTargetConstant(
N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index de77401eb0137..37ce5d548dfed 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2283,7 +2283,7 @@ let True16Predicate = UseRealTrue16Insts in {
foreach vt = [f16, bf16] in {
def : GCNPat <
(VGPRImm<(vt fpimm)>:$imm),
- (V_MOV_B16_t16_e64 0, $imm, 0)
+ (V_MOV_B16_t16_e64 0, (vt (bitcast_fpimm_to_i16 $imm)), 0)
>;
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll
index 98832aaa3bc25..aa9a32f429be1 100644
--- a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16 %s
define amdgpu_kernel void @br_cc_f16(
; SI-LABEL: br_cc_f16:
@@ -60,32 +61,62 @@ define amdgpu_kernel void @br_cc_f16(
; VI-NEXT: buffer_store_short v1, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: br_cc_f16:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_load_b64 s[8:9], s[4:5], 0x34
-; GFX11-NEXT: s_mov_b32 s6, -1
-; GFX11-NEXT: s_mov_b32 s7, 0x31016000
-; GFX11-NEXT: s_mov_b32 s10, s6
-; GFX11-NEXT: s_mov_b32 s11, s7
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s4, s2
-; GFX11-NEXT: s_mov_b32 s5, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: buffer_load_u16 v1, off, s[8:11], 0 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: s_mov_b32 s2, s6
-; GFX11-NEXT: s_mov_b32 s3, s7
-; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT: s_cbranch_vccnz .LBB0_2
-; GFX11-NEXT: ; %bb.1: ; %one
-; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
-; GFX11-NEXT: s_endpgm
-; GFX11-NEXT: .LBB0_2: ; %two
-; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: br_cc_f16:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_load_b64 s[8:9], s[4:5], 0x34
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX11-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s4, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s5, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[8:11], 0 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6
+; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v2.l, v2.h
+; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB0_2
+; GFX11-TRUE16-NEXT: ; %bb.1: ; %one
+; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+; GFX11-TRUE16-NEXT: .LBB0_2: ; %two
+; GFX11-TRUE16-NEXT: buffer_store_b16 v1, off, s[0:3], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: br_cc_f16:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_load_b64 s[8:9], s[4:5], 0x34
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX11-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s4, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s5, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[8:11], 0 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s2, s6
+; GFX11-FAKE16-NEXT: s_mov_b32 s3, s7
+; GFX11-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB0_2
+; GFX11-FAKE16-NEXT: ; %bb.1: ; %one
+; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-FAKE16-NEXT: s_endpgm
+; GFX11-FAKE16-NEXT: .LBB0_2: ; %two
+; GFX11-FAKE16-NEXT: buffer_store_b16 v1, off, s[0:3], 0
+; GFX11-FAKE16-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b) {
@@ -151,25 +182,47 @@ define amdgpu_kernel void @br_cc_f16_imm_a(
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: br_cc_f16_imm_a:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_mov_b32 s7, 0x31016000
-; GFX11-NEXT: s_mov_b32 s6, -1
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s4, s2
-; GFX11-NEXT: s_mov_b32 s5, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v0
-; GFX11-NEXT: s_cbranch_vccnz .LBB1_2
-; GFX11-NEXT: ; %bb.1: ; %one
-; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800
-; GFX11-NEXT: .LBB1_2: ; %two
-; GFX11-NEXT: s_mov_b32 s2, s6
-; GFX11-NEXT: s_mov_b32 s3, s7
-; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: br_cc_f16_imm_a:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s4, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s5, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v1.l
+; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB1_2
+; GFX11-TRUE16-NEXT: ; %bb.1: ; %one
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0x3800
+; GFX11-TRUE16-NEXT: .LBB1_2: ; %two
+; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6
+; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7
+; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: br_cc_f16_imm_a:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s4, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s5, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v0
+; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB1_2
+; GFX11-FAKE16-NEXT: ; %bb.1: ; %one
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3800
+; GFX11-FAKE16-NEXT: .LBB1_2: ; %two
+; GFX11-FAKE16-NEXT: s_mov_b32 s2, s6
+; GFX11-FAKE16-NEXT: s_mov_b32 s3, s7
+; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-FAKE16-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %b) {
entry:
@@ -235,25 +288,47 @@ define amdgpu_kernel void @br_cc_f16_imm_b(
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
-; GFX11-LABEL: br_cc_f16_imm_b:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: s_mov_b32 s7, 0x31016000
-; GFX11-NEXT: s_mov_b32 s6, -1
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s4, s2
-; GFX11-NEXT: s_mov_b32 s5, s3
-; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v0
-; GFX11-NEXT: s_cbranch_vccz .LBB2_2
-; GFX11-NEXT: ; %bb.1: ; %two
-; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800
-; GFX11-NEXT: .LBB2_2: ; %one
-; GFX11-NEXT: s_mov_b32 s2, s6
-; GFX11-NEXT: s_mov_b32 s3, s7
-; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: br_cc_f16_imm_b:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s4, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s5, s3
+; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v1.l
+; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB2_2
+; GFX11-TRUE16-NEXT: ; %bb.1: ; %two
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0x3800
+; GFX11-TRUE16-NEXT: .LBB2_2: ; %one
+; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6
+; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7
+; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: br_cc_f16_imm_b:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s4, s2
+; GFX11-FAKE16-NEXT: s_mov_b32 s5, s3
+; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v0
+; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB2_2
+; GFX11-FAKE16-NEXT: ; %bb.1: ; %two
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3800
+; GFX11-FAKE16-NEXT: .LBB2_2: ; %one
+; GFX11-FAKE16-NEXT: s_mov_b32 s2, s6
+; GFX11-FAKE16-NEXT: s_mov_b32 s3, s7
+; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-FAKE16-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
@@ -269,3 +344,5 @@ two:
store half 0xH3800, ptr addrspace(1) %r
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11: {{.*}}
More information about the llvm-commits
mailing list