[llvm] a51798e - [AMDGPU][True16][CodeGen] true16 codegen pat for fptrunc_round (#124044)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 30 15:31:56 PST 2025
Author: Brox Chen
Date: 2025-01-30T18:31:52-05:00
New Revision: a51798e3d68b9e04660757ff6e645c8ba46baaa9
URL: https://github.com/llvm/llvm-project/commit/a51798e3d68b9e04660757ff6e645c8ba46baaa9
DIFF: https://github.com/llvm/llvm-project/commit/a51798e3d68b9e04660757ff6e645c8ba46baaa9.diff
LOG: [AMDGPU][True16][CodeGen] true16 codegen pat for fptrunc_round (#124044)
true16 codegen pattern for fptrunc_round f32 to f16.
For mir test, split to preGFX11 and postGFX11. and add a true16 and a
fake16 test accordingly
Added:
llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx11plus-fake16.mir
llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx11plus.mir
Modified:
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/SIModeRegister.cpp
llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir
llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 5af46989aca97b..bee4c47a23ba6b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -228,16 +228,39 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
// Pseudo instructions used for @llvm.fptrunc.round. The final codegen is done
// in the ModeRegister pass.
let Uses = [MODE, EXEC] in {
+let True16Predicate = NotHasTrue16BitInsts in
def FPTRUNC_ROUND_F16_F32_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VGPR_32:$src0, i32imm:$round)>;
+let True16Predicate = UseFakeTrue16Insts in
+def FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 : VPseudoInstSI <(outs VGPR_32:$vdst),
+ (ins VGPR_32:$src0, i32imm:$round)>;
+
+let True16Predicate = UseRealTrue16Insts in
+// The operands of these pseudos should match V_CVT_F16_F32_t16_e64
+def FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 : VPseudoInstSI <(outs VOPDstOperand_t16:$vdst),
+ (ins FP32InputMods:$src0_modifiers, VSrc_f32:$src0, Clamp0:$clamp, omod0:$omod, op_sel0:$op_sel, i32imm:$round)> {
+ let FPClamp = 1;
+ let ClampLo = 1;
+ let UseNamedOperandTable = 1;
+}
+
def FPTRUNC_ROUND_F32_F64_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VReg_64:$src0, i32imm:$round)>;
} // End Uses = [MODE, EXEC]
+let True16Predicate = NotHasTrue16BitInsts in
def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
(FPTRUNC_ROUND_F16_F32_PSEUDO $src0, (as_hw_round_mode $round))>;
+let True16Predicate = UseFakeTrue16Insts in
+def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
+ (FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $src0, (as_hw_round_mode $round))>;
+
+let True16Predicate = UseRealTrue16Insts in
+def : GCNPat <(f16 (fptrunc_round (f32 (VOP3OpSelMods f32:$src0, i32:$src0_modifiers)), (i32 SupportedRoundMode:$round))),
+ (FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 $src0_modifiers, $src0, (as_hw_round_mode $round))>;
+
def : GCNPat <(f32 (fptrunc_round f64:$src0, (i32 SupportedRoundMode:$round))),
(FPTRUNC_ROUND_F32_F64_PSEUDO $src0, (as_hw_round_mode $round))>;
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index 412e2f2fe45d1e..99aea52c184d6e 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -166,6 +166,8 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
unsigned Opcode = MI.getOpcode();
if (TII->usesFPDPRounding(MI) ||
Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||
+ Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 ||
+ Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 ||
Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) {
switch (Opcode) {
case AMDGPU::V_INTERP_P1LL_F16:
@@ -177,19 +179,19 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO: {
unsigned Mode = MI.getOperand(2).getImm();
MI.removeOperand(2);
- // Replacing the pseudo by a real instruction in place
- if (TII->getSubtarget().hasTrue16BitInsts()) {
- MachineBasicBlock &MBB = *MI.getParent();
- MachineInstrBuilder B(*MBB.getParent(), MI);
- MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_fake16_e64));
- MachineOperand Src0 = MI.getOperand(1);
- MI.removeOperand(1);
- B.addImm(0); // src0_modifiers
- B.add(Src0); // re-add src0 operand
- B.addImm(0); // clamp
- B.addImm(0); // omod
- } else
- MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
+ MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
+ return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
+ }
+ case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32: {
+ unsigned Mode = MI.getOperand(2).getImm();
+ MI.removeOperand(2);
+ MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_fake16_e32));
+ return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
+ }
+ case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64: {
+ unsigned Mode = MI.getOperand(6).getImm();
+ MI.removeOperand(6);
+ MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64));
return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
}
case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {
diff --git a/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx11plus-fake16.mir b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx11plus-fake16.mir
new file mode 100644
index 00000000000000..8667934d70ff00
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx11plus-fake16.mir
@@ -0,0 +1,73 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=GFX11
+
+---
+name: ftrunc_tonearest
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GFX11-LABEL: name: ftrunc_tonearest
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr0, 0, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: ftrunc_upward
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GFX11-LABEL: name: ftrunc_upward
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
+ ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr0, 1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: ftrunc_downward
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GFX11-LABEL: name: ftrunc_downward
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
+ ; GFX11-NEXT: $vgpr0 = V_CVT_F16_F32_fake16_e32 $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr0 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr1, 2, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: ftrunc_towardzero
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GFX11-LABEL: name: ftrunc_towardzero
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ ; GFX11-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
+ ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr0, 3, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx11plus.mir b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx11plus.mir
new file mode 100644
index 00000000000000..9b439af4bf7b4f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx11plus.mir
@@ -0,0 +1,73 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=GFX11
+
+---
+name: ftrunc_tonearest
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GFX11-LABEL: name: ftrunc_tonearest
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ ; GFX11-NEXT: $vgpr1_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr1_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: ftrunc_upward
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GFX11-LABEL: name: ftrunc_upward
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
+ ; GFX11-NEXT: $vgpr1_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr1_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr0, 0, 0, 0, 1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: ftrunc_downward
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GFX11-LABEL: name: ftrunc_downward
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
+ ; GFX11-NEXT: $vgpr0_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr0_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr1, 0, 0, 0, 2, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: ftrunc_towardzero
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GFX11-LABEL: name: ftrunc_towardzero
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ ; GFX11-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
+ ; GFX11-NEXT: $vgpr1_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr1_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr0, 0, 0, 0, 3, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir
index 67eb719fd2c0d9..a28f347603ab7c 100644
--- a/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir
+++ b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir
@@ -1,7 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=CHECK
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=CHECK
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=GFX11
---
name: ftrunc_tonearest
@@ -15,13 +14,6 @@ body: |
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; CHECK-NEXT: $vgpr1 = V_CVT_F16_F32_e32 $vgpr0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
- ;
- ; GFX11-LABEL: name: ftrunc_tonearest
- ; GFX11: liveins: $sgpr0
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 0, implicit $mode, implicit $exec
S_ENDPGM 0
@@ -39,14 +31,6 @@ body: |
; CHECK-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
; CHECK-NEXT: $vgpr1 = V_CVT_F16_F32_e32 $vgpr0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
- ;
- ; GFX11-LABEL: name: ftrunc_upward
- ; GFX11: liveins: $sgpr0
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
- ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 1, implicit $mode, implicit $exec
S_ENDPGM 0
@@ -64,14 +48,6 @@ body: |
; CHECK-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
; CHECK-NEXT: $vgpr0 = V_CVT_F16_F32_e32 $vgpr1, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
- ;
- ; GFX11-LABEL: name: ftrunc_downward
- ; GFX11: liveins: $sgpr0
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
- ; GFX11-NEXT: $vgpr0 = V_CVT_F16_F32_fake16_e64 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0
$vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr0 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr1, 2, implicit $mode, implicit $exec
S_ENDPGM 0
@@ -89,14 +65,6 @@ body: |
; CHECK-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
; CHECK-NEXT: $vgpr1 = V_CVT_F16_F32_e32 $vgpr0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
- ;
- ; GFX11-LABEL: name: ftrunc_towardzero
- ; GFX11: liveins: $sgpr0
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- ; GFX11-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
- ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 3, implicit $mode, implicit $exec
S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
index 4a1287c56ea8ee..0ad1c30b5b5a4f 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
@@ -3,7 +3,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9ALL,GFX900 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9ALL,GFX906 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
define i16 @shl_i16(i16 %x, i16 %y) {
; GFX8-LABEL: shl_i16:
@@ -24,11 +25,17 @@ define i16 @shl_i16(i16 %x, i16 %y) {
; GFX10-NEXT: v_lshlrev_b16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: shl_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshlrev_b16 v0, v1, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: shl_i16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.l, v1.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: shl_i16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshlrev_b16 v0, v1, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%res = shl i16 %x, %y
ret i16 %res
}
@@ -52,11 +59,17 @@ define i16 @lshr_i16(i16 %x, i16 %y) {
; GFX10-NEXT: v_lshrrev_b16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: lshr_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshrrev_b16 v0, v1, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: lshr_i16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshrrev_b16 v0.l, v1.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: lshr_i16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b16 v0, v1, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%res = lshr i16 %x, %y
ret i16 %res
}
@@ -80,11 +93,17 @@ define i16 @ashr_i16(i16 %x, i16 %y) {
; GFX10-NEXT: v_ashrrev_i16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: ashr_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_ashrrev_i16 v0, v1, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: ashr_i16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_ashrrev_i16 v0.l, v1.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: ashr_i16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_ashrrev_i16 v0, v1, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%res = ashr i16 %x, %y
ret i16 %res
}
@@ -108,11 +127,18 @@ define i16 @add_u16(i16 %x, i16 %y) {
; GFX10-NEXT: v_add_nc_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: add_u16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_add_nc_u16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_u16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_u16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_add_nc_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%res = add i16 %x, %y
ret i16 %res
}
@@ -136,11 +162,18 @@ define i16 @sub_u16(i16 %x, i16 %y) {
; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: sub_u16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_sub_nc_u16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: sub_u16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: sub_u16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_sub_nc_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%res = sub i16 %x, %y
ret i16 %res
}
@@ -164,11 +197,18 @@ define i16 @mul_lo_u16(i16 %x, i16 %y) {
; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: mul_lo_u16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_lo_u16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_lo_u16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%res = mul i16 %x, %y
ret i16 %res
}
@@ -192,11 +232,18 @@ define i16 @min_u16(i16 %x, i16 %y) {
; GFX10-NEXT: v_min_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: min_u16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_min_u16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: min_u16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: min_u16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_min_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp ule i16 %x, %y
%res = select i1 %cmp, i16 %x, i16 %y
ret i16 %res
@@ -221,11 +268,18 @@ define i16 @min_i16(i16 %x, i16 %y) {
; GFX10-NEXT: v_min_i16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: min_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_min_i16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: min_i16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: min_i16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_min_i16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp sle i16 %x, %y
%res = select i1 %cmp, i16 %x, i16 %y
ret i16 %res
@@ -250,11 +304,18 @@ define i16 @max_u16(i16 %x, i16 %y) {
; GFX10-NEXT: v_max_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: max_u16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_u16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: max_u16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: max_u16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_max_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp uge i16 %x, %y
%res = select i1 %cmp, i16 %x, i16 %y
ret i16 %res
@@ -279,11 +340,18 @@ define i16 @max_i16(i16 %x, i16 %y) {
; GFX10-NEXT: v_max_i16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: max_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_i16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: max_i16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: max_i16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_max_i16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp sge i16 %x, %y
%res = select i1 %cmp, i16 %x, i16 %y
ret i16 %res
@@ -309,12 +377,19 @@ define i32 @shl_i16_zext_i32(i16 %x, i16 %y) {
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: shl_i16_zext_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshlrev_b16 v0, v1, v0
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: shl_i16_zext_i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.l, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: shl_i16_zext_i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshlrev_b16 v0, v1, v0
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%res = shl i16 %x, %y
%zext = zext i16 %res to i32
ret i32 %zext
@@ -340,12 +415,19 @@ define i32 @lshr_i16_zext_i32(i16 %x, i16 %y) {
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: lshr_i16_zext_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshrrev_b16 v0, v1, v0
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: lshr_i16_zext_i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshrrev_b16 v0.l, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: lshr_i16_zext_i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b16 v0, v1, v0
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%res = lshr i16 %x, %y
%zext = zext i16 %res to i32
ret i32 %zext
@@ -371,12 +453,19 @@ define i32 @ashr_i16_zext_i32(i16 %x, i16 %y) {
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: ashr_i16_zext_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_ashrrev_i16 v0, v1, v0
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: ashr_i16_zext_i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_ashrrev_i16 v0.l, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: ashr_i16_zext_i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_ashrrev_i16 v0, v1, v0
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%res = ashr i16 %x, %y
%zext = zext i16 %res to i32
ret i32 %zext
@@ -402,12 +491,20 @@ define i32 @add_u16_zext_i32(i16 %x, i16 %y) {
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: add_u16_zext_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_add_nc_u16 v0, v0, v1
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_u16_zext_i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_u16_zext_i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_add_nc_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%res = add i16 %x, %y
%zext = zext i16 %res to i32
ret i32 %zext
@@ -433,12 +530,20 @@ define i32 @sub_u16_zext_i32(i16 %x, i16 %y) {
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: sub_u16_zext_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_sub_nc_u16 v0, v0, v1
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: sub_u16_zext_i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: sub_u16_zext_i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_sub_nc_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%res = sub i16 %x, %y
%zext = zext i16 %res to i32
ret i32 %zext
@@ -464,12 +569,20 @@ define i32 @mul_lo_u16_zext_i32(i16 %x, i16 %y) {
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: mul_lo_u16_zext_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_lo_u16_zext_i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_lo_u16_zext_i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%res = mul i16 %x, %y
%zext = zext i16 %res to i32
ret i32 %zext
@@ -495,12 +608,20 @@ define i32 @min_u16_zext_i32(i16 %x, i16 %y) {
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: min_u16_zext_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_min_u16 v0, v0, v1
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: min_u16_zext_i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: min_u16_zext_i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_min_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp ule i16 %x, %y
%res = select i1 %cmp, i16 %x, i16 %y
%zext = zext i16 %res to i32
@@ -527,12 +648,20 @@ define i32 @min_i16_zext_i32(i16 %x, i16 %y) {
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: min_i16_zext_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_min_i16 v0, v0, v1
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: min_i16_zext_i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: min_i16_zext_i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_min_i16 v0, v0, v1
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp sle i16 %x, %y
%res = select i1 %cmp, i16 %x, i16 %y
%zext = zext i16 %res to i32
@@ -559,12 +688,20 @@ define i32 @max_u16_zext_i32(i16 %x, i16 %y) {
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: max_u16_zext_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_u16 v0, v0, v1
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: max_u16_zext_i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: max_u16_zext_i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_max_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp uge i16 %x, %y
%res = select i1 %cmp, i16 %x, i16 %y
%zext = zext i16 %res to i32
@@ -591,12 +728,20 @@ define i32 @max_i16_zext_i32(i16 %x, i16 %y) {
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: max_i16_zext_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_max_i16 v0, v0, v1
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: max_i16_zext_i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: max_i16_zext_i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_max_i16 v0, v0, v1
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp sge i16 %x, %y
%res = select i1 %cmp, i16 %x, i16 %y
%zext = zext i16 %res to i32
@@ -623,12 +768,20 @@ define i32 @zext_fadd_f16(half %x, half %y) {
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: zext_fadd_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_add_f16_e32 v0, v0, v1
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: zext_fadd_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: zext_fadd_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%add = fadd half %x, %y
%cast = bitcast half %add to i16
%zext = zext i16 %cast to i32
@@ -656,12 +809,21 @@ define i32 @zext_fma_f16(half %x, half %y, half %z) {
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: zext_fma_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_fmac_f16_e32 v2, v0, v1
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v2
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: zext_fma_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: zext_fma_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_fmac_f16_e32 v2, v0, v1
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v2
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fma = call half @llvm.fma.f16(half %x, half %y, half %z)
%cast = bitcast half %fma to i16
%zext = zext i16 %cast to i32
@@ -689,12 +851,21 @@ define i32 @zext_div_fixup_f16(half %x, half %y, half %z) {
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: zext_div_fixup_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_div_fixup_f16 v0, v0, v1, v2
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: zext_div_fixup_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX11-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: zext_div_fixup_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_div_fixup_f16 v0, v0, v1, v2
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%div.fixup = call half @llvm.amdgcn.div.fixup.f16(half %x, half %y, half %z)
%cast = bitcast half %div.fixup to i16
%zext = zext i16 %cast to i32
@@ -724,12 +895,19 @@ define i32 @zext_fptrunc_f16(float %x) {
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: zext_fptrunc_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: zext_fptrunc_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: zext_fptrunc_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%fptrunc = fptrunc float %x to half
%cast = bitcast half %fptrunc to i16
%zext = zext i16 %cast to i32
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll b/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
index 0f80327638a9cb..52d882590cbced 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
@@ -2,8 +2,9 @@
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX8 %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX9 %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX11-FAKE16 %s
define half @v_constrained_fptrunc_f32_to_f16_fpexcept_strict(float %arg) #0 {
; SI-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
@@ -20,11 +21,23 @@ define half @v_constrained_fptrunc_f32_to_f16_fpexcept_strict(float %arg) #0 {
; GFX89-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret half %val
}
@@ -58,13 +71,29 @@ define <2 x half> @v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict(<2 x flo
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX1011-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX1011-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <2 x half> %val
}
@@ -103,14 +132,33 @@ define <3 x half> @v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict(<3 x flo
; GFX9-NEXT: v_perm_b32 v0, v3, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX1011-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX1011-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1011-NEXT: v_cvt_f16_f32_e32 v1, v2
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v2
+; GFX11-TRUE16-NEXT: v_perm_b32 v0, v3, v0, 0x5040100
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%val = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <3 x half> %val
}
@@ -181,12 +229,26 @@ define half @v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict(float %arg) #
; GFX89-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX1011-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX10-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
%neg.val = fneg half %val
ret half %neg.val
@@ -207,11 +269,23 @@ define half @v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict(float %arg) #
; GFX89-NEXT: v_cvt_f16_f32_e64 v0, -v0
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_cvt_f16_f32_e64 v0, -v0
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cvt_f16_f32_e64 v0, -v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e64 v0.l, -v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e64 v0, -v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%neg.arg = fneg float %arg
%val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %neg.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret half %val
@@ -255,11 +329,23 @@ define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi(float %arg,
; GFX89-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret void
}
@@ -316,14 +402,23 @@ define void @v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi(<2 x flo
; GFX10-NEXT: global_store_dword v[2:3], v0, off
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT: global_store_b32 v[2:3], v0, off
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-TRUE16-NEXT: global_store_b32 v[2:3], v0, off
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: global_store_b32 v[2:3], v0, off
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
store <2 x half> %result, ptr addrspace(1) %ptr
ret void
@@ -344,11 +439,23 @@ define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg(float %
; GFX89-NEXT: v_cvt_f16_f32_e64 v0, -v0
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_cvt_f16_f32_e64 v0, -v0
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cvt_f16_f32_e64 v0, -v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e64 v0.l, -v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e64 v0, -v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%neg.arg = fneg float %arg
%result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %neg.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret void
@@ -369,11 +476,23 @@ define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs(float %
; GFX89-NEXT: v_cvt_f16_f32_e64 v0, |v0|
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_cvt_f16_f32_e64 v0, |v0|
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cvt_f16_f32_e64 v0, |v0|
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e64 v0.l, |v0|
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e64 v0, |v0|
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%abs.arg = call float @llvm.fabs.f32(float %arg) #0
%result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %abs.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret void
More information about the llvm-commits
mailing list