[llvm] [AMDGPU][ISel] Set trunc store action to expand for v4f32->v4bf16 (PR #90427)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 28 20:12:32 PDT 2024
https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/90427
None
>From f5e82bcb6ab26b5e1a6b5fb498acd44f3480ed63 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Sun, 28 Apr 2024 23:09:33 -0400
Subject: [PATCH] [AMDGPU][ISel] Set trunc store action to expand for
v4f32->v4bf16
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 1 +
.../CodeGen/AMDGPU/fp_trunc_store_bf16.ll | 39 +++++++++++++++++++
2 files changed, 40 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/fp_trunc_store_bf16.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 7993b63121110c..c49d6c63cb3f82 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -316,6 +316,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand);
setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand);
setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand);
+ setTruncStoreAction(MVT::v4f32, MVT::v4bf16, Expand);
setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand);
setTruncStoreAction(MVT::v16f32, MVT::v16f16, Expand);
setTruncStoreAction(MVT::v32f32, MVT::v32f16, Expand);
diff --git a/llvm/test/CodeGen/AMDGPU/fp_trunc_store_bf16.ll b/llvm/test/CodeGen/AMDGPU/fp_trunc_store_bf16.ll
new file mode 100644
index 00000000000000..ae92bf8a5f17ac
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fp_trunc_store_bf16.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s
+
+define void @square(<4 x float> %num, ptr addrspace(1) %p) {
+; CHECK-LABEL: square:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_bfe_u32 v6, v2, 16, 1
+; CHECK-NEXT: s_movk_i32 s4, 0x7fff
+; CHECK-NEXT: v_add3_u32 v6, v6, v2, s4
+; CHECK-NEXT: v_or_b32_e32 v7, 0x400000, v2
+; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v7, vcc
+; CHECK-NEXT: v_bfe_u32 v6, v3, 16, 1
+; CHECK-NEXT: v_add3_u32 v6, v6, v3, s4
+; CHECK-NEXT: v_or_b32_e32 v7, 0x400000, v3
+; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
+; CHECK-NEXT: v_cndmask_b32_e32 v3, v6, v7, vcc
+; CHECK-NEXT: s_mov_b32 s5, 0x7060302
+; CHECK-NEXT: v_perm_b32 v3, v3, v2, s5
+; CHECK-NEXT: v_bfe_u32 v2, v0, 16, 1
+; CHECK-NEXT: v_add3_u32 v2, v2, v0, s4
+; CHECK-NEXT: v_or_b32_e32 v6, 0x400000, v0
+; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc
+; CHECK-NEXT: v_bfe_u32 v2, v1, 16, 1
+; CHECK-NEXT: v_add3_u32 v2, v2, v1, s4
+; CHECK-NEXT: v_or_b32_e32 v6, 0x400000, v1
+; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v6, vcc
+; CHECK-NEXT: v_perm_b32 v2, v1, v0, s5
+; CHECK-NEXT: global_store_dwordx2 v[4:5], v[2:3], off
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %conv = fptrunc <4 x float> %num to <4 x bfloat>
+ store <4 x bfloat> %conv, ptr addrspace(1) %p, align 8
+ ret void
+}
More information about the llvm-commits
mailing list