[llvm] gisel lower fp64 to fp16 (PR #128911)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 5 19:54:06 PST 2025
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/128911
>From 0415cdb9681ca8e68d6c924f2e7a362044a02f0d Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Wed, 26 Feb 2025 12:00:48 -0500
Subject: [PATCH] fptrunc in true16
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 13 +-
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 3 +
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 11 +
llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll | 990 ++++++++++++------
llvm/test/CodeGen/AMDGPU/fptrunc.ll | 76 +-
5 files changed, 738 insertions(+), 355 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index ade81f17ecca5..dca2983f94be8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3578,15 +3578,22 @@ SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) con
return SDValue();
}
- assert(N0.getSimpleValueType() == MVT::f64);
+ return LowerF64ToF16(N0, Op.getValueType(), DL, DAG);
+}
+
+SDValue AMDGPUTargetLowering::LowerF64ToF16(SDValue Src, EVT ResTy,
+ const SDLoc &DL,
+ SelectionDAG &DAG) const {
+ assert(Src.getSimpleValueType() == MVT::f64);
// f64 -> f16 conversion using round-to-nearest-even rounding mode.
+ // TODO: We can generate better code for True16.
const unsigned ExpMask = 0x7ff;
const unsigned ExpBiasf64 = 1023;
const unsigned ExpBiasf16 = 15;
SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
SDValue One = DAG.getConstant(1, DL, MVT::i32);
- SDValue U = DAG.getNode(ISD::BITCAST, DL, MVT::i64, N0);
+ SDValue U = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Src);
SDValue UH = DAG.getNode(ISD::SRL, DL, MVT::i64, U,
DAG.getConstant(32, DL, MVT::i64));
UH = DAG.getZExtOrTrunc(UH, DL, MVT::i32);
@@ -3661,7 +3668,7 @@ SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) con
DAG.getConstant(0x8000, DL, MVT::i32));
V = DAG.getNode(ISD::OR, DL, MVT::i32, Sign, V);
- return DAG.getZExtOrTrunc(V, DL, Op.getValueType());
+ return DAG.getZExtOrTrunc(V, DL, ResTy);
}
SDValue AMDGPUTargetLowering::LowerFP_TO_INT(const SDValue Op,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index c74dc7942f52c..7b73b7b6a7127 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -97,6 +97,9 @@ class AMDGPUTargetLowering : public TargetLowering {
SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerF64ToF16(SDValue Src, EVT ResTy, const SDLoc &DL,
+ SelectionDAG &DAG) const;
+
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
protected:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index fe095414e5172..1c9c8422e2257 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6825,6 +6825,17 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
+ if (Subtarget->useRealTrue16Insts()) {
+ if (getTargetMachine().Options.UnsafeFPMath) {
+ SDValue Flags = Op.getOperand(1);
+ SDValue Src32 = DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Src, Flags);
+ return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Src32, Flags);
+ }
+
+ SDValue FpToFp16 = LowerF64ToF16(Src, MVT::i16, DL, DAG);
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f16, FpToFp16);
+ }
+
SDValue FpToFp16 = DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i32, Src);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToFp16);
return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
index 188b2ada64686..9057c81cbd555 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
@@ -7,8 +7,10 @@
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-GISEL %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-SDAG %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-GISEL %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-FAKE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-FAKE16 %s
define amdgpu_kernel void @fptrunc_f32_to_f16(
; SI-SDAG-LABEL: fptrunc_f32_to_f16:
@@ -131,35 +133,65 @@ define amdgpu_kernel void @fptrunc_f32_to_f16(
; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX950-GISEL-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: fptrunc_f32_to_f16:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
-; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
-; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
-; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
-; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
-; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
-; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0
-; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
-; GFX11-SDAG-NEXT: s_endpgm
-;
-; GFX11-GISEL-LABEL: fptrunc_f32_to_f16:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0
-; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
-; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
-; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-SDAG-TRUE16-LABEL: fptrunc_f32_to_f16:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX11-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-TRUE16-LABEL: fptrunc_f32_to_f16:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, s2
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, s2
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-FAKE16-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
@@ -298,39 +330,73 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX950-GISEL-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: fptrunc_f64_to_f16:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
-; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
-; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
-; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
-; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
-; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
-; GFX11-SDAG-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0
-; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
-; GFX11-SDAG-NEXT: s_endpgm
-;
-; GFX11-GISEL-LABEL: fptrunc_f64_to_f16:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
-; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
-; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-SDAG-TRUE16-LABEL: fptrunc_f64_to_f16:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-TRUE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX11-SDAG-FAKE16-LABEL: fptrunc_f64_to_f16:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-FAKE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-TRUE16-LABEL: fptrunc_f64_to_f16:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-FAKE16-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
@@ -477,41 +543,77 @@ define amdgpu_kernel void @fptrunc_v2f32_to_v2f16(
; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX950-GISEL-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: fptrunc_v2f32_to_v2f16:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
-; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
-; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
-; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
-; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
-; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
-; GFX11-SDAG-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0
-; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
-; GFX11-SDAG-NEXT: s_endpgm
-;
-; GFX11-GISEL-LABEL: fptrunc_v2f32_to_v2f16:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
-; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v1, s3
-; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
-; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-SDAG-TRUE16-LABEL: fptrunc_v2f32_to_v2f16:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-TRUE16-NEXT: buffer_load_b64 v[1:2], off, s[8:11], 0
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v2
+; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX11-SDAG-FAKE16-LABEL: fptrunc_v2f32_to_v2f16:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-FAKE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-TRUE16-LABEL: fptrunc_v2f32_to_v2f16:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, s2
+; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, s3
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-FAKE16-LABEL: fptrunc_v2f32_to_v2f16:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, s2
+; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, s3
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-GISEL-FAKE16-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
@@ -674,48 +776,90 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX950-GISEL-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
-; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
-; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
-; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
-; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
-; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
-; GFX11-SDAG-NEXT: buffer_load_b128 v[0:3], off, s[8:11], 0
-; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
-; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, v[2:3]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-SDAG-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
-; GFX11-SDAG-NEXT: s_endpgm
-;
-; GFX11-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
-; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
-; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
-; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-SDAG-TRUE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-TRUE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], 0
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
+; GFX11-SDAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v1, v[0:1]
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v2
+; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX11-SDAG-FAKE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-FAKE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], 0
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; GFX11-SDAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v1, v[2:3]
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-SDAG-FAKE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-TRUE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
+; GFX11-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-FAKE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
+; GFX11-GISEL-FAKE16-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-GISEL-FAKE16-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
@@ -846,35 +990,65 @@ define amdgpu_kernel void @fneg_fptrunc_f32_to_f16(
; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX950-GISEL-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: fneg_fptrunc_f32_to_f16:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
-; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
-; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
-; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
-; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
-; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
-; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0
-; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
-; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
-; GFX11-SDAG-NEXT: s_endpgm
-;
-; GFX11-GISEL-LABEL: fneg_fptrunc_f32_to_f16:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0
-; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -s2
-; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
-; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-SDAG-TRUE16-LABEL: fneg_fptrunc_f32_to_f16:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e64 v0.l, -v0
+; GFX11-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX11-SDAG-FAKE16-LABEL: fneg_fptrunc_f32_to_f16:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e64 v0, -v0
+; GFX11-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-TRUE16-LABEL: fneg_fptrunc_f32_to_f16:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e64 v0.l, -s2
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-FAKE16-LABEL: fneg_fptrunc_f32_to_f16:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e64 v0, -s2
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-FAKE16-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
@@ -1006,35 +1180,65 @@ define amdgpu_kernel void @fabs_fptrunc_f32_to_f16(
; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX950-GISEL-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: fabs_fptrunc_f32_to_f16:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
-; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
-; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
-; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
-; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
-; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
-; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0
-; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0|
-; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
-; GFX11-SDAG-NEXT: s_endpgm
-;
-; GFX11-GISEL-LABEL: fabs_fptrunc_f32_to_f16:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0
-; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s2|
-; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
-; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-SDAG-TRUE16-LABEL: fabs_fptrunc_f32_to_f16:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e64 v0.l, |v0|
+; GFX11-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX11-SDAG-FAKE16-LABEL: fabs_fptrunc_f32_to_f16:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e64 v0, |v0|
+; GFX11-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-TRUE16-LABEL: fabs_fptrunc_f32_to_f16:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e64 v0.l, |s2|
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-FAKE16-LABEL: fabs_fptrunc_f32_to_f16:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e64 v0, |s2|
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-FAKE16-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
@@ -1166,35 +1370,65 @@ define amdgpu_kernel void @fneg_fabs_fptrunc_f32_to_f16(
; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX950-GISEL-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: fneg_fabs_fptrunc_f32_to_f16:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
-; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
-; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
-; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
-; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
-; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
-; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0
-; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -|v0|
-; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
-; GFX11-SDAG-NEXT: s_endpgm
-;
-; GFX11-GISEL-LABEL: fneg_fabs_fptrunc_f32_to_f16:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0
-; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -|s2|
-; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
-; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-SDAG-TRUE16-LABEL: fneg_fabs_fptrunc_f32_to_f16:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e64 v0.l, -|v0|
+; GFX11-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX11-SDAG-FAKE16-LABEL: fneg_fabs_fptrunc_f32_to_f16:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e64 v0, -|v0|
+; GFX11-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-TRUE16-LABEL: fneg_fabs_fptrunc_f32_to_f16:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e64 v0.l, -|s2|
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-FAKE16-LABEL: fneg_fabs_fptrunc_f32_to_f16:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e64 v0, -|s2|
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-FAKE16-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a) #0 {
entry:
@@ -1327,39 +1561,73 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32(
; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX950-GISEL-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: fptrunc_f32_to_f16_zext_i32:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
-; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
-; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
-; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
-; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
-; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
-; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0
-; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
-; GFX11-SDAG-NEXT: s_endpgm
-;
-; GFX11-GISEL-LABEL: fptrunc_f32_to_f16_zext_i32:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0
-; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
-; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-SDAG-TRUE16-LABEL: fptrunc_f32_to_f16_zext_i32:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX11-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16_zext_i32:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-TRUE16-LABEL: fptrunc_f32_to_f16_zext_i32:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, s2
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16_zext_i32:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, s2
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-GISEL-FAKE16-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a) #0 {
entry:
@@ -1492,39 +1760,73 @@ define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32(
; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX950-GISEL-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
-; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
-; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
-; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
-; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
-; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
-; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0
-; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0|
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
-; GFX11-SDAG-NEXT: s_endpgm
-;
-; GFX11-GISEL-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0
-; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s2|
-; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-SDAG-TRUE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e64 v0.l, |v0|
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX11-SDAG-FAKE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e64 v0, |v0|
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-TRUE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e64 v0.l, |s2|
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-FAKE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e64 v0, |s2|
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-GISEL-FAKE16-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a) #0 {
entry:
@@ -1666,39 +1968,73 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_sext_i32(
; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX950-GISEL-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: fptrunc_f32_to_f16_sext_i32:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
-; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
-; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
-; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
-; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
-; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
-; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0
-; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
-; GFX11-SDAG-NEXT: s_endpgm
-;
-; GFX11-GISEL-LABEL: fptrunc_f32_to_f16_sext_i32:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0
-; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
-; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-SDAG-TRUE16-LABEL: fptrunc_f32_to_f16_sext_i32:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX11-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX11-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16_sext_i32:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX11-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-TRUE16-LABEL: fptrunc_f32_to_f16_sext_i32:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, s2
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX11-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16_sext_i32:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, s2
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-GISEL-FAKE16-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a) #0 {
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.ll
index 4bab6eaab6f7d..1faa3e37752f6 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.ll
@@ -10,8 +10,10 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-GISEL,GFX10-UNSAFE-GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-SAFE-SDAG %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-SAFE-GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-SDAG %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,+real-true16 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-DAG-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-DAG-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-FAKE16 %s
define amdgpu_kernel void @fptrunc_f64_to_f32(ptr addrspace(1) %out, double %in) {
; SI-LABEL: fptrunc_f64_to_f32:
@@ -520,29 +522,53 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
; GFX11-SAFE-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-SAFE-GISEL-NEXT: s_endpgm
;
-; GFX11-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
-; GFX11-UNSAFE-SDAG: ; %bb.0:
-; GFX11-UNSAFE-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
-; GFX11-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000
-; GFX11-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1
-; GFX11-UNSAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-UNSAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0
-; GFX11-UNSAFE-SDAG-NEXT: s_endpgm
-;
-; GFX11-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
-; GFX11-UNSAFE-GISEL: ; %bb.0:
-; GFX11-UNSAFE-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-UNSAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-UNSAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
-; GFX11-UNSAFE-GISEL-NEXT: s_mov_b32 s2, -1
-; GFX11-UNSAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000
-; GFX11-UNSAFE-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-UNSAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-UNSAFE-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
-; GFX11-UNSAFE-GISEL-NEXT: s_endpgm
+; GFX11-UNSAFE-DAG-TRUE16-LABEL: fptrunc_f64_to_f16:
+; GFX11-UNSAFE-DAG-TRUE16: ; %bb.0:
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_endpgm
+;
+; GFX11-UNSAFE-DAG-FAKE16-LABEL: fptrunc_f64_to_f16:
+; GFX11-UNSAFE-DAG-FAKE16: ; %bb.0:
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_mov_b32 s2, -1
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_endpgm
+;
+; GFX11-UNSAFE-GISEL-TRUE16-LABEL: fptrunc_f64_to_f16:
+; GFX11-UNSAFE-GISEL-TRUE16: ; %bb.0:
+; GFX11-UNSAFE-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-UNSAFE-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-UNSAFE-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX11-UNSAFE-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX11-UNSAFE-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-UNSAFE-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-UNSAFE-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-UNSAFE-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-UNSAFE-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX11-UNSAFE-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16:
+; GFX11-UNSAFE-GISEL-FAKE16: ; %bb.0:
+; GFX11-UNSAFE-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-UNSAFE-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-UNSAFE-GISEL-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX11-UNSAFE-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
+; GFX11-UNSAFE-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-UNSAFE-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-UNSAFE-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-UNSAFE-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-UNSAFE-GISEL-FAKE16-NEXT: s_endpgm
%result = fptrunc double %in to half
%result_i16 = bitcast half %result to i16
store i16 %result_i16, ptr addrspace(1) %out
More information about the llvm-commits
mailing list