[llvm] fix vgprimm for t16 (PR #131021)

Brox Chen via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 13 11:20:40 PDT 2025


https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/131021

>From 3a6d39951f4afbc05110d22efc47e901d7386363 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Wed, 12 Mar 2025 16:14:58 -0400
Subject: [PATCH] fix vgprimm for t16

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.td    |   5 +
 llvm/lib/Target/AMDGPU/SIInstructions.td |   2 +-
 llvm/test/CodeGen/AMDGPU/br_cc.f16.ll    | 207 ++++++++++++++++-------
 3 files changed, 148 insertions(+), 66 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 403c657c64053..23a7f508dcda2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -846,6 +846,11 @@ def cond_as_i32imm: SDNodeXForm<cond, [{
 }]>;
 
 // Copied from the AArch64 backend:
+def bitcast_fpimm_to_i16 : SDNodeXForm<fpimm, [{
+return CurDAG->getTargetConstant(
+  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i16);
+}]>;
+
 def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
 return CurDAG->getTargetConstant(
   N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index de77401eb0137..37ce5d548dfed 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2283,7 +2283,7 @@ let True16Predicate = UseRealTrue16Insts in {
   foreach vt = [f16, bf16] in {
     def : GCNPat <
       (VGPRImm<(vt fpimm)>:$imm),
-      (V_MOV_B16_t16_e64 0, $imm, 0)
+      (V_MOV_B16_t16_e64 0, (vt (bitcast_fpimm_to_i16 $imm)), 0)
     >;
   }
 }
diff --git a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll
index 98832aaa3bc25..aa9a32f429be1 100644
--- a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll
@@ -1,7 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16 %s
 
 define amdgpu_kernel void @br_cc_f16(
 ; SI-LABEL: br_cc_f16:
@@ -60,32 +61,62 @@ define amdgpu_kernel void @br_cc_f16(
 ; VI-NEXT:    buffer_store_short v1, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: br_cc_f16:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_clause 0x1
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    s_load_b64 s[8:9], s[4:5], 0x34
-; GFX11-NEXT:    s_mov_b32 s6, -1
-; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s10, s6
-; GFX11-NEXT:    s_mov_b32 s11, s7
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s4, s2
-; GFX11-NEXT:    s_mov_b32 s5, s3
-; GFX11-NEXT:    buffer_load_u16 v0, off, s[4:7], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    buffer_load_u16 v1, off, s[8:11], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s2, s6
-; GFX11-NEXT:    s_mov_b32 s3, s7
-; GFX11-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_cbranch_vccnz .LBB0_2
-; GFX11-NEXT:  ; %bb.1: ; %one
-; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
-; GFX11-NEXT:    s_endpgm
-; GFX11-NEXT:  .LBB0_2: ; %two
-; GFX11-NEXT:    buffer_store_b16 v1, off, s[0:3], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: br_cc_f16:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_clause 0x1
+; GFX11-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_load_b64 s[8:9], s[4:5], 0x34
+; GFX11-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s4, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s5, s3
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v0, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v1, off, s[8:11], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s2, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s3, s7
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v2.l, v0.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v2.l, v2.h
+; GFX11-TRUE16-NEXT:    s_cbranch_vccnz .LBB0_2
+; GFX11-TRUE16-NEXT:  ; %bb.1: ; %one
+; GFX11-TRUE16-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+; GFX11-TRUE16-NEXT:  .LBB0_2: ; %two
+; GFX11-TRUE16-NEXT:    buffer_store_b16 v1, off, s[0:3], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: br_cc_f16:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_clause 0x1
+; GFX11-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_load_b64 s[8:9], s[4:5], 0x34
+; GFX11-FAKE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s10, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s11, s7
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s4, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s5, s3
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v0, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v1, off, s[8:11], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s2, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s3, s7
+; GFX11-FAKE16-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT:    s_cbranch_vccnz .LBB0_2
+; GFX11-FAKE16-NEXT:  ; %bb.1: ; %one
+; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
+; GFX11-FAKE16-NEXT:  .LBB0_2: ; %two
+; GFX11-FAKE16-NEXT:    buffer_store_b16 v1, off, s[0:3], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) {
@@ -151,25 +182,47 @@ define amdgpu_kernel void @br_cc_f16_imm_a(
 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: br_cc_f16_imm_a:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s6, -1
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s4, s2
-; GFX11-NEXT:    s_mov_b32 s5, s3
-; GFX11-NEXT:    buffer_load_u16 v0, off, s[4:7], 0
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, 0.5, v0
-; GFX11-NEXT:    s_cbranch_vccnz .LBB1_2
-; GFX11-NEXT:  ; %bb.1: ; %one
-; GFX11-NEXT:    v_mov_b32_e32 v0, 0x3800
-; GFX11-NEXT:  .LBB1_2: ; %two
-; GFX11-NEXT:    s_mov_b32 s2, s6
-; GFX11-NEXT:    s_mov_b32 s3, s7
-; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: br_cc_f16_imm_a:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s4, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s5, s3
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, 0.5, v1.l
+; GFX11-TRUE16-NEXT:    s_cbranch_vccnz .LBB1_2
+; GFX11-TRUE16-NEXT:  ; %bb.1: ; %one
+; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v0, 0x3800
+; GFX11-TRUE16-NEXT:  .LBB1_2: ; %two
+; GFX11-TRUE16-NEXT:    s_mov_b32 s2, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s3, s7
+; GFX11-TRUE16-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: br_cc_f16_imm_a:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s4, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s5, s3
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, 0.5, v0
+; GFX11-FAKE16-NEXT:    s_cbranch_vccnz .LBB1_2
+; GFX11-FAKE16-NEXT:  ; %bb.1: ; %one
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, 0x3800
+; GFX11-FAKE16-NEXT:  .LBB1_2: ; %two
+; GFX11-FAKE16-NEXT:    s_mov_b32 s2, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s3, s7
+; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %b) {
 entry:
@@ -235,25 +288,47 @@ define amdgpu_kernel void @br_cc_f16_imm_b(
 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: br_cc_f16_imm_b:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s6, -1
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s4, s2
-; GFX11-NEXT:    s_mov_b32 s5, s3
-; GFX11-NEXT:    buffer_load_u16 v0, off, s[4:7], 0
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_cmp_ngt_f16_e32 vcc_lo, 0.5, v0
-; GFX11-NEXT:    s_cbranch_vccz .LBB2_2
-; GFX11-NEXT:  ; %bb.1: ; %two
-; GFX11-NEXT:    v_mov_b32_e32 v0, 0x3800
-; GFX11-NEXT:  .LBB2_2: ; %one
-; GFX11-NEXT:    s_mov_b32 s2, s6
-; GFX11-NEXT:    s_mov_b32 s3, s7
-; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: br_cc_f16_imm_b:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s4, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s5, s3
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v0.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cmp_ngt_f16_e32 vcc_lo, 0.5, v1.l
+; GFX11-TRUE16-NEXT:    s_cbranch_vccz .LBB2_2
+; GFX11-TRUE16-NEXT:  ; %bb.1: ; %two
+; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v0, 0x3800
+; GFX11-TRUE16-NEXT:  .LBB2_2: ; %one
+; GFX11-TRUE16-NEXT:    s_mov_b32 s2, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s3, s7
+; GFX11-TRUE16-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: br_cc_f16_imm_b:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s4, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s5, s3
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cmp_ngt_f16_e32 vcc_lo, 0.5, v0
+; GFX11-FAKE16-NEXT:    s_cbranch_vccz .LBB2_2
+; GFX11-FAKE16-NEXT:  ; %bb.1: ; %two
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, 0x3800
+; GFX11-FAKE16-NEXT:  .LBB2_2: ; %one
+; GFX11-FAKE16-NEXT:    s_mov_b32 s2, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s3, s7
+; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a) {
 entry:
@@ -269,3 +344,5 @@ two:
   store half 0xH3800, ptr addrspace(1) %r
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11: {{.*}}



More information about the llvm-commits mailing list