[llvm] [AMDGPU][True16][CodeGen] true16 codegen pat for fptrunc_round (PR #124044)

Brox Chen via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 28 06:46:27 PST 2025


https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/124044

>From f254546d0cd0728c469d4724dc88d811767d7d67 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Wed, 22 Jan 2025 16:26:59 -0500
Subject: [PATCH] true16 codegen pat for fptrunc.f16

---
 llvm/lib/Target/AMDGPU/SIInstructions.td      |  23 +
 llvm/lib/Target/AMDGPU/SIModeRegister.cpp     |  28 +-
 ...mode-register-fptrunc.gfx11plus-fake16.mir |  73 +++
 .../mode-register-fptrunc.gfx11plus.mir       |  73 +++
 .../CodeGen/AMDGPU/mode-register-fptrunc.mir  |  32 --
 llvm/test/CodeGen/AMDGPU/preserve-hi16.ll     | 448 ++++++++++++------
 llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll    | 231 ++++++---
 7 files changed, 672 insertions(+), 236 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx11plus-fake16.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx11plus.mir

diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 40a20fa9cb15ea..addf6a07f5ee6c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -228,16 +228,39 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
 // Pseudo instructions used for @llvm.fptrunc.round. The final codegen is done
 // in the ModeRegister pass.
 let Uses = [MODE, EXEC] in {
+let True16Predicate = NotHasTrue16BitInsts in
 def FPTRUNC_ROUND_F16_F32_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
   (ins VGPR_32:$src0, i32imm:$round)>;
 
+let True16Predicate = UseFakeTrue16Insts in
+def FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 : VPseudoInstSI <(outs VGPR_32:$vdst),
+  (ins VGPR_32:$src0, i32imm:$round)>;
+
+let True16Predicate = UseRealTrue16Insts in
+// The operands of these pseudos should match V_CVT_F16_F32_t16_e64
+def FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 : VPseudoInstSI <(outs VOPDstOperand_t16:$vdst),
+  (ins FP32InputMods:$src0_modifiers, VSrc_f32:$src0, Clamp0:$clamp, omod0:$omod, op_sel0:$op_sel, i32imm:$round)> {
+   let FPClamp = 1;
+   let ClampLo = 1;
+   let UseNamedOperandTable = 1;
+}
+
 def FPTRUNC_ROUND_F32_F64_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
   (ins VReg_64:$src0, i32imm:$round)>;
 } // End Uses = [MODE, EXEC]
 
+let True16Predicate = NotHasTrue16BitInsts in
 def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
      (FPTRUNC_ROUND_F16_F32_PSEUDO $src0, (as_hw_round_mode $round))>;
 
+let True16Predicate = UseFakeTrue16Insts in
+def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
+     (FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $src0, (as_hw_round_mode $round))>;
+
+let True16Predicate = UseRealTrue16Insts in
+def : GCNPat <(f16 (fptrunc_round (f32 (VOP3OpSelMods f32:$src0, i32:$src0_modifiers)), (i32 SupportedRoundMode:$round))),
+     (FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 $src0_modifiers, $src0, (as_hw_round_mode $round))>;
+
 def : GCNPat <(f32 (fptrunc_round f64:$src0, (i32 SupportedRoundMode:$round))),
      (FPTRUNC_ROUND_F32_F64_PSEUDO $src0, (as_hw_round_mode $round))>;
 
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index 412e2f2fe45d1e..99aea52c184d6e 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -166,6 +166,8 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
   unsigned Opcode = MI.getOpcode();
   if (TII->usesFPDPRounding(MI) ||
       Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||
+      Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 ||
+      Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 ||
       Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) {
     switch (Opcode) {
     case AMDGPU::V_INTERP_P1LL_F16:
@@ -177,19 +179,19 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
     case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO: {
       unsigned Mode = MI.getOperand(2).getImm();
       MI.removeOperand(2);
-      // Replacing the pseudo by a real instruction in place
-      if (TII->getSubtarget().hasTrue16BitInsts()) {
-        MachineBasicBlock &MBB = *MI.getParent();
-        MachineInstrBuilder B(*MBB.getParent(), MI);
-        MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_fake16_e64));
-        MachineOperand Src0 = MI.getOperand(1);
-        MI.removeOperand(1);
-        B.addImm(0); // src0_modifiers
-        B.add(Src0); // re-add src0 operand
-        B.addImm(0); // clamp
-        B.addImm(0); // omod
-      } else
-        MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
+      MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
+      return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
+    }
+    case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32: {
+      unsigned Mode = MI.getOperand(2).getImm();
+      MI.removeOperand(2);
+      MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_fake16_e32));
+      return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
+    }
+    case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64: {
+      unsigned Mode = MI.getOperand(6).getImm();
+      MI.removeOperand(6);
+      MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64));
       return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
     }
     case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {
diff --git a/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx11plus-fake16.mir b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx11plus-fake16.mir
new file mode 100644
index 00000000000000..8667934d70ff00
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx11plus-fake16.mir
@@ -0,0 +1,73 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass si-mode-register  %s -o - | FileCheck %s --check-prefixes=GFX11
+
+---
+name: ftrunc_tonearest
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; GFX11-LABEL: name: ftrunc_tonearest
+    ; GFX11: liveins: $sgpr0
+    ; GFX11-NEXT: {{  $}}
+    ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+    ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GFX11-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+    $vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr0, 0, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name: ftrunc_upward
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; GFX11-LABEL: name: ftrunc_upward
+    ; GFX11: liveins: $sgpr0
+    ; GFX11-NEXT: {{  $}}
+    ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+    ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
+    ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GFX11-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+    $vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr0, 1, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name: ftrunc_downward
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; GFX11-LABEL: name: ftrunc_downward
+    ; GFX11: liveins: $sgpr0
+    ; GFX11-NEXT: {{  $}}
+    ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+    ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
+    ; GFX11-NEXT: $vgpr0 = V_CVT_F16_F32_fake16_e32 $vgpr1, implicit $mode, implicit $exec
+    ; GFX11-NEXT: S_ENDPGM 0
+    $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+    $vgpr0 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr1, 2, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name: ftrunc_towardzero
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; GFX11-LABEL: name: ftrunc_towardzero
+    ; GFX11: liveins: $sgpr0
+    ; GFX11-NEXT: {{  $}}
+    ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+    ; GFX11-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
+    ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GFX11-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+    $vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr0, 3, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx11plus.mir b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx11plus.mir
new file mode 100644
index 00000000000000..9b439af4bf7b4f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx11plus.mir
@@ -0,0 +1,73 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass si-mode-register  %s -o - | FileCheck %s --check-prefixes=GFX11
+
+---
+name: ftrunc_tonearest
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; GFX11-LABEL: name: ftrunc_tonearest
+    ; GFX11: liveins: $sgpr0
+    ; GFX11-NEXT: {{  $}}
+    ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+    ; GFX11-NEXT: $vgpr1_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX11-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+    $vgpr1_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name: ftrunc_upward
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; GFX11-LABEL: name: ftrunc_upward
+    ; GFX11: liveins: $sgpr0
+    ; GFX11-NEXT: {{  $}}
+    ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+    ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
+    ; GFX11-NEXT: $vgpr1_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX11-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+    $vgpr1_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr0, 0, 0, 0, 1, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name: ftrunc_downward
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; GFX11-LABEL: name: ftrunc_downward
+    ; GFX11: liveins: $sgpr0
+    ; GFX11-NEXT: {{  $}}
+    ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+    ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
+    ; GFX11-NEXT: $vgpr0_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX11-NEXT: S_ENDPGM 0
+    $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+    $vgpr0_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr1, 0, 0, 0, 2, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name: ftrunc_towardzero
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; GFX11-LABEL: name: ftrunc_towardzero
+    ; GFX11: liveins: $sgpr0
+    ; GFX11-NEXT: {{  $}}
+    ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+    ; GFX11-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
+    ; GFX11-NEXT: $vgpr1_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX11-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+    $vgpr1_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr0, 0, 0, 0, 3, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir
index 67eb719fd2c0d9..a28f347603ab7c 100644
--- a/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir
+++ b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir
@@ -1,7 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-mode-register  %s -o - | FileCheck %s --check-prefixes=CHECK
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass si-mode-register  %s -o - | FileCheck %s --check-prefixes=CHECK
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass si-mode-register  %s -o - | FileCheck %s --check-prefixes=GFX11
 
 ---
 name: ftrunc_tonearest
@@ -15,13 +14,6 @@ body: |
     ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
     ; CHECK-NEXT: $vgpr1 = V_CVT_F16_F32_e32 $vgpr0, implicit $mode, implicit $exec
     ; CHECK-NEXT: S_ENDPGM 0
-    ;
-    ; GFX11-LABEL: name: ftrunc_tonearest
-    ; GFX11: liveins: $sgpr0
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
-    ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
     $vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 0, implicit $mode, implicit $exec
     S_ENDPGM 0
@@ -39,14 +31,6 @@ body: |
     ; CHECK-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
     ; CHECK-NEXT: $vgpr1 = V_CVT_F16_F32_e32 $vgpr0, implicit $mode, implicit $exec
     ; CHECK-NEXT: S_ENDPGM 0
-    ;
-    ; GFX11-LABEL: name: ftrunc_upward
-    ; GFX11: liveins: $sgpr0
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
-    ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
-    ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
     $vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 1, implicit $mode, implicit $exec
     S_ENDPGM 0
@@ -64,14 +48,6 @@ body: |
     ; CHECK-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
     ; CHECK-NEXT: $vgpr0 = V_CVT_F16_F32_e32 $vgpr1, implicit $mode, implicit $exec
     ; CHECK-NEXT: S_ENDPGM 0
-    ;
-    ; GFX11-LABEL: name: ftrunc_downward
-    ; GFX11: liveins: $sgpr0
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
-    ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
-    ; GFX11-NEXT: $vgpr0 = V_CVT_F16_F32_fake16_e64 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0
     $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
     $vgpr0 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr1, 2, implicit $mode, implicit $exec
     S_ENDPGM 0
@@ -89,14 +65,6 @@ body: |
     ; CHECK-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
     ; CHECK-NEXT: $vgpr1 = V_CVT_F16_F32_e32 $vgpr0, implicit $mode, implicit $exec
     ; CHECK-NEXT: S_ENDPGM 0
-    ;
-    ; GFX11-LABEL: name: ftrunc_towardzero
-    ; GFX11: liveins: $sgpr0
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
-    ; GFX11-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
-    ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
     $vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 3, implicit $mode, implicit $exec
     S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
index 4a1287c56ea8ee..0ad1c30b5b5a4f 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
@@ -3,7 +3,8 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9ALL,GFX900 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9ALL,GFX906 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
 
 define i16 @shl_i16(i16 %x, i16 %y) {
 ; GFX8-LABEL: shl_i16:
@@ -24,11 +25,17 @@ define i16 @shl_i16(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_lshlrev_b16 v0, v1, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: shl_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshlrev_b16 v0, v1, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: shl_i16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v0.l, v1.l, v0.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: shl_i16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshlrev_b16 v0, v1, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %res = shl i16 %x, %y
   ret i16 %res
 }
@@ -52,11 +59,17 @@ define i16 @lshr_i16(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_lshrrev_b16 v0, v1, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: lshr_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b16 v0, v1, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: lshr_i16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b16 v0.l, v1.l, v0.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: lshr_i16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b16 v0, v1, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %res = lshr i16 %x, %y
   ret i16 %res
 }
@@ -80,11 +93,17 @@ define i16 @ashr_i16(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_ashrrev_i16 v0, v1, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: ashr_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_ashrrev_i16 v0, v1, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: ashr_i16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_ashrrev_i16 v0.l, v1.l, v0.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: ashr_i16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_ashrrev_i16 v0, v1, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %res = ashr i16 %x, %y
   ret i16 %res
 }
@@ -108,11 +127,18 @@ define i16 @add_u16(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_add_nc_u16 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_u16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_add_nc_u16 v0, v0, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_u16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_add_nc_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_u16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_add_nc_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %res = add i16 %x, %y
   ret i16 %res
 }
@@ -136,11 +162,18 @@ define i16 @sub_u16(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_sub_nc_u16 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: sub_u16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_sub_nc_u16 v0, v0, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: sub_u16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_sub_nc_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: sub_u16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_sub_nc_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %res = sub i16 %x, %y
   ret i16 %res
 }
@@ -164,11 +197,18 @@ define i16 @mul_lo_u16(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_mul_lo_u16 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: mul_lo_u16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_lo_u16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_mul_lo_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_lo_u16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_mul_lo_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %res = mul i16 %x, %y
   ret i16 %res
 }
@@ -192,11 +232,18 @@ define i16 @min_u16(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_min_u16 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: min_u16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_u16 v0, v0, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: min_u16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_min_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: min_u16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_min_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp ule i16 %x, %y
   %res = select i1 %cmp, i16 %x, i16 %y
   ret i16 %res
@@ -221,11 +268,18 @@ define i16 @min_i16(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_min_i16 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: min_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_i16 v0, v0, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: min_i16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_min_i16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: min_i16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_min_i16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp sle i16 %x, %y
   %res = select i1 %cmp, i16 %x, i16 %y
   ret i16 %res
@@ -250,11 +304,18 @@ define i16 @max_u16(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_max_u16 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: max_u16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_u16 v0, v0, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: max_u16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_max_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: max_u16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_max_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp uge i16 %x, %y
   %res = select i1 %cmp, i16 %x, i16 %y
   ret i16 %res
@@ -279,11 +340,18 @@ define i16 @max_i16(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_max_i16 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: max_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_i16 v0, v0, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: max_i16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_max_i16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: max_i16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_max_i16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp sge i16 %x, %y
   %res = select i1 %cmp, i16 %x, i16 %y
   ret i16 %res
@@ -309,12 +377,19 @@ define i32 @shl_i16_zext_i32(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: shl_i16_zext_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshlrev_b16 v0, v1, v0
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: shl_i16_zext_i32:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshlrev_b16 v0.l, v1.l, v0.l
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: shl_i16_zext_i32:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshlrev_b16 v0, v1, v0
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %res = shl i16 %x, %y
   %zext = zext i16 %res to i32
   ret i32 %zext
@@ -340,12 +415,19 @@ define i32 @lshr_i16_zext_i32(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: lshr_i16_zext_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b16 v0, v1, v0
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: lshr_i16_zext_i32:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b16 v0.l, v1.l, v0.l
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: lshr_i16_zext_i32:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b16 v0, v1, v0
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %res = lshr i16 %x, %y
   %zext = zext i16 %res to i32
   ret i32 %zext
@@ -371,12 +453,19 @@ define i32 @ashr_i16_zext_i32(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: ashr_i16_zext_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_ashrrev_i16 v0, v1, v0
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: ashr_i16_zext_i32:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_ashrrev_i16 v0.l, v1.l, v0.l
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: ashr_i16_zext_i32:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_ashrrev_i16 v0, v1, v0
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %res = ashr i16 %x, %y
   %zext = zext i16 %res to i32
   ret i32 %zext
@@ -402,12 +491,20 @@ define i32 @add_u16_zext_i32(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_u16_zext_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_add_nc_u16 v0, v0, v1
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_u16_zext_i32:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_add_nc_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_u16_zext_i32:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_add_nc_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %res = add i16 %x, %y
   %zext = zext i16 %res to i32
   ret i32 %zext
@@ -433,12 +530,20 @@ define i32 @sub_u16_zext_i32(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: sub_u16_zext_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_sub_nc_u16 v0, v0, v1
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: sub_u16_zext_i32:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_sub_nc_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: sub_u16_zext_i32:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_sub_nc_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %res = sub i16 %x, %y
   %zext = zext i16 %res to i32
   ret i32 %zext
@@ -464,12 +569,20 @@ define i32 @mul_lo_u16_zext_i32(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: mul_lo_u16_zext_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_lo_u16_zext_i32:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_mul_lo_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_lo_u16_zext_i32:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_mul_lo_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %res = mul i16 %x, %y
   %zext = zext i16 %res to i32
   ret i32 %zext
@@ -495,12 +608,20 @@ define i32 @min_u16_zext_i32(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: min_u16_zext_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_u16 v0, v0, v1
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: min_u16_zext_i32:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_min_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: min_u16_zext_i32:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_min_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp ule i16 %x, %y
   %res = select i1 %cmp, i16 %x, i16 %y
   %zext = zext i16 %res to i32
@@ -527,12 +648,20 @@ define i32 @min_i16_zext_i32(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: min_i16_zext_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_i16 v0, v0, v1
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: min_i16_zext_i32:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_min_i16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: min_i16_zext_i32:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_min_i16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp sle i16 %x, %y
   %res = select i1 %cmp, i16 %x, i16 %y
   %zext = zext i16 %res to i32
@@ -559,12 +688,20 @@ define i32 @max_u16_zext_i32(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: max_u16_zext_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_u16 v0, v0, v1
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: max_u16_zext_i32:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_max_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: max_u16_zext_i32:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_max_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp uge i16 %x, %y
   %res = select i1 %cmp, i16 %x, i16 %y
   %zext = zext i16 %res to i32
@@ -591,12 +728,20 @@ define i32 @max_i16_zext_i32(i16 %x, i16 %y) {
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: max_i16_zext_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_i16 v0, v0, v1
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: max_i16_zext_i32:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_max_i16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: max_i16_zext_i32:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_max_i16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp sge i16 %x, %y
   %res = select i1 %cmp, i16 %x, i16 %y
   %zext = zext i16 %res to i32
@@ -623,12 +768,20 @@ define i32 @zext_fadd_f16(half %x, half %y) {
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: zext_fadd_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_add_f16_e32 v0, v0, v1
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: zext_fadd_f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: zext_fadd_f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %add = fadd half %x, %y
   %cast = bitcast half %add to i16
   %zext = zext i16 %cast to i32
@@ -656,12 +809,21 @@ define i32 @zext_fma_f16(half %x, half %y, half %z) {
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: zext_fma_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_fmac_f16_e32 v2, v0, v1
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: zext_fma_f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX11-TRUE16-NEXT:    v_fma_f16 v0.l, v0.l, v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: zext_fma_f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_fmac_f16_e32 v2, v0, v1
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v2
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %fma = call half @llvm.fma.f16(half %x, half %y, half %z)
   %cast = bitcast half %fma to i16
   %zext = zext i16 %cast to i32
@@ -689,12 +851,21 @@ define i32 @zext_div_fixup_f16(half %x, half %y, half %z) {
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: zext_div_fixup_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_div_fixup_f16 v0, v0, v1, v2
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: zext_div_fixup_f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX11-TRUE16-NEXT:    v_div_fixup_f16 v0.l, v0.l, v0.h, v1.l
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: zext_div_fixup_f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_div_fixup_f16 v0, v0, v1, v2
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %div.fixup = call half @llvm.amdgcn.div.fixup.f16(half %x, half %y, half %z)
   %cast = bitcast half %div.fixup to i16
   %zext = zext i16 %cast to i32
@@ -724,12 +895,19 @@ define i32 @zext_fptrunc_f16(float %x) {
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: zext_fptrunc_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: zext_fptrunc_f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: zext_fptrunc_f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %fptrunc = fptrunc float %x to half
   %cast = bitcast half %fptrunc to i16
   %zext = zext i16 %cast to i32
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll b/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
index 0f80327638a9cb..52d882590cbced 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
@@ -2,8 +2,9 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX8 %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX9 %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX11-FAKE16 %s
 
 define half @v_constrained_fptrunc_f32_to_f16_fpexcept_strict(float %arg) #0 {
 ; SI-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
@@ -20,11 +21,23 @@ define half @v_constrained_fptrunc_f32_to_f16_fpexcept_strict(float %arg) #0 {
 ; GFX89-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX89-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1011-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
-; GFX1011:       ; %bb.0:
-; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX1011-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret half %val
 }
@@ -58,13 +71,29 @@ define <2 x half> @v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict(<2 x flo
 ; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1011-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
-; GFX1011:       ; %bb.0:
-; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX1011-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX1011-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1011-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret <2 x half> %val
 }
@@ -103,14 +132,33 @@ define <3 x half> @v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict(<3 x flo
 ; GFX9-NEXT:    v_perm_b32 v0, v3, v0, s4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1011-LABEL: v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict:
-; GFX1011:       ; %bb.0:
-; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX1011-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX1011-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1011-NEXT:    v_cvt_f16_f32_e32 v1, v2
-; GFX1011-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v2
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v3.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e32 v1.l, v2
+; GFX11-TRUE16-NEXT:    v_perm_b32 v0, v3, v0, 0x5040100
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e32 v1, v2
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret <3 x half> %val
 }
@@ -181,12 +229,26 @@ define half @v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict(float %arg) #
 ; GFX89-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
 ; GFX89-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1011-LABEL: v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict:
-; GFX1011:       ; %bb.0:
-; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX1011-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
-; GFX1011-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX10-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT:    v_xor_b16 v0.l, 0x8000, v0.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
   %neg.val = fneg half %val
   ret half %neg.val
@@ -207,11 +269,23 @@ define half @v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict(float %arg) #
 ; GFX89-NEXT:    v_cvt_f16_f32_e64 v0, -v0
 ; GFX89-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1011-LABEL: v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict:
-; GFX1011:       ; %bb.0:
-; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT:    v_cvt_f16_f32_e64 v0, -v0
-; GFX1011-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_cvt_f16_f32_e64 v0, -v0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e64 v0.l, -v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e64 v0, -v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %neg.arg = fneg float %arg
   %val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %neg.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret half %val
@@ -255,11 +329,23 @@ define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi(float %arg,
 ; GFX89-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX89-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1011-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi:
-; GFX1011:       ; %bb.0:
-; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX1011-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret void
 }
@@ -316,14 +402,23 @@ define void @v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi(<2 x flo
 ; GFX10-NEXT:    global_store_dword v[2:3], v0, off
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX11-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    global_store_b32 v[2:3], v0, off
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e32 v1.l, v1
+; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-TRUE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-TRUE16-NEXT:    global_store_b32 v[2:3], v0, off
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    global_store_b32 v[2:3], v0, off
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %result = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
   store <2 x half> %result, ptr addrspace(1) %ptr
   ret void
@@ -344,11 +439,23 @@ define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg(float %
 ; GFX89-NEXT:    v_cvt_f16_f32_e64 v0, -v0
 ; GFX89-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1011-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg:
-; GFX1011:       ; %bb.0:
-; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT:    v_cvt_f16_f32_e64 v0, -v0
-; GFX1011-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_cvt_f16_f32_e64 v0, -v0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e64 v0.l, -v0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e64 v0, -v0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %neg.arg = fneg float %arg
   %result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %neg.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret void
@@ -369,11 +476,23 @@ define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs(float %
 ; GFX89-NEXT:    v_cvt_f16_f32_e64 v0, |v0|
 ; GFX89-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1011-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs:
-; GFX1011:       ; %bb.0:
-; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT:    v_cvt_f16_f32_e64 v0, |v0|
-; GFX1011-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_cvt_f16_f32_e64 v0, |v0|
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e64 v0.l, |v0|
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e64 v0, |v0|
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %abs.arg = call float @llvm.fabs.f32(float %arg) #0
   %result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %abs.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret void



More information about the llvm-commits mailing list