[llvm] [AMDGPU][True16][CodeGen] true16 codegen pat for fptrunc.f16 (PR #124044)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 23 07:24:04 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Brox Chen (broxigarchen)
<details>
<summary>Changes</summary>
true16 codegen pattern for fptrunc.f16.
For mir test, split to preGFX11 and postGFX11. and add a true16 and a fake16 test accordingly
---
Patch is 54.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124044.diff
7 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+23)
- (modified) llvm/lib/Target/AMDGPU/SIModeRegister.cpp (+17-12)
- (added) llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx10plus-fake16.mir (+70)
- (added) llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx10plus.mir (+70)
- (modified) llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir (-32)
- (modified) llvm/test/CodeGen/AMDGPU/preserve-hi16.ll (+313-135)
- (modified) llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll (+175-56)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 40a20fa9cb15ea..addf6a07f5ee6c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -228,16 +228,39 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
// Pseudo instructions used for @llvm.fptrunc.round. The final codegen is done
// in the ModeRegister pass.
let Uses = [MODE, EXEC] in {
+let True16Predicate = NotHasTrue16BitInsts in
def FPTRUNC_ROUND_F16_F32_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VGPR_32:$src0, i32imm:$round)>;
+let True16Predicate = UseFakeTrue16Insts in
+def FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 : VPseudoInstSI <(outs VGPR_32:$vdst),
+ (ins VGPR_32:$src0, i32imm:$round)>;
+
+let True16Predicate = UseRealTrue16Insts in
+// The operands of these pseudos should match V_CVT_F16_F32_t16_e64
+def FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 : VPseudoInstSI <(outs VOPDstOperand_t16:$vdst),
+ (ins FP32InputMods:$src0_modifiers, VSrc_f32:$src0, Clamp0:$clamp, omod0:$omod, op_sel0:$op_sel, i32imm:$round)> {
+ let FPClamp = 1;
+ let ClampLo = 1;
+ let UseNamedOperandTable = 1;
+}
+
def FPTRUNC_ROUND_F32_F64_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VReg_64:$src0, i32imm:$round)>;
} // End Uses = [MODE, EXEC]
+let True16Predicate = NotHasTrue16BitInsts in
def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
(FPTRUNC_ROUND_F16_F32_PSEUDO $src0, (as_hw_round_mode $round))>;
+let True16Predicate = UseFakeTrue16Insts in
+def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
+ (FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $src0, (as_hw_round_mode $round))>;
+
+let True16Predicate = UseRealTrue16Insts in
+def : GCNPat <(f16 (fptrunc_round (f32 (VOP3OpSelMods f32:$src0, i32:$src0_modifiers)), (i32 SupportedRoundMode:$round))),
+ (FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 $src0_modifiers, $src0, (as_hw_round_mode $round))>;
+
def : GCNPat <(f32 (fptrunc_round f64:$src0, (i32 SupportedRoundMode:$round))),
(FPTRUNC_ROUND_F32_F64_PSEUDO $src0, (as_hw_round_mode $round))>;
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index 412e2f2fe45d1e..a19b06351a2d20 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -166,6 +166,8 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
unsigned Opcode = MI.getOpcode();
if (TII->usesFPDPRounding(MI) ||
Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||
+ Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 ||
+ Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 ||
Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) {
switch (Opcode) {
case AMDGPU::V_INTERP_P1LL_F16:
@@ -178,18 +180,21 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
unsigned Mode = MI.getOperand(2).getImm();
MI.removeOperand(2);
// Replacing the pseudo by a real instruction in place
- if (TII->getSubtarget().hasTrue16BitInsts()) {
- MachineBasicBlock &MBB = *MI.getParent();
- MachineInstrBuilder B(*MBB.getParent(), MI);
- MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_fake16_e64));
- MachineOperand Src0 = MI.getOperand(1);
- MI.removeOperand(1);
- B.addImm(0); // src0_modifiers
- B.add(Src0); // re-add src0 operand
- B.addImm(0); // clamp
- B.addImm(0); // omod
- } else
- MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
+ MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
+ return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
+ }
+ case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32: {
+ unsigned Mode = MI.getOperand(2).getImm();
+ MI.removeOperand(2);
+ // Replacing the pseudo by a real instruction in place
+ MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_fake16_e32));
+ return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
+ }
+ case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64: {
+ unsigned Mode = MI.getOperand(6).getImm();
+ MI.removeOperand(6);
+ // Replacing the pseudo by a real instruction in place
+ MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64));
return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
}
case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {
diff --git a/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx10plus-fake16.mir b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx10plus-fake16.mir
new file mode 100644
index 00000000000000..b95ead19ad2477
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx10plus-fake16.mir
@@ -0,0 +1,70 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=GFX11
+
+---
+name: ftrunc_tonearest
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GFX11-LABEL: name: ftrunc_tonearest
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr0, 0, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+---
+name: ftrunc_upward
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GFX11-LABEL: name: ftrunc_upward
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
+ ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr0, 1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+---
+name: ftrunc_downward
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GFX11-LABEL: name: ftrunc_downward
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
+ ; GFX11-NEXT: $vgpr0 = V_CVT_F16_F32_fake16_e32 $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr0 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr1, 2, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+---
+name: ftrunc_towardzero
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GFX11-LABEL: name: ftrunc_towardzero
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ ; GFX11-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
+ ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 $vgpr0, 3, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx10plus.mir b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx10plus.mir
new file mode 100644
index 00000000000000..4ac256332cb4f2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.gfx10plus.mir
@@ -0,0 +1,70 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=GFX11
+
+---
+name: ftrunc_tonearest
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GFX11-LABEL: name: ftrunc_tonearest
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ ; GFX11-NEXT: $vgpr1_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr1_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+---
+name: ftrunc_upward
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GFX11-LABEL: name: ftrunc_upward
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
+ ; GFX11-NEXT: $vgpr1_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr1_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr0, 0, 0, 0, 1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+---
+name: ftrunc_downward
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GFX11-LABEL: name: ftrunc_downward
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
+ ; GFX11-NEXT: $vgpr0_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr0_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr1, 0, 0, 0, 2, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+---
+name: ftrunc_towardzero
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; GFX11-LABEL: name: ftrunc_towardzero
+ ; GFX11: liveins: $sgpr0
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ ; GFX11-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
+ ; GFX11-NEXT: $vgpr1_hi16 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr1_hi16 = FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 0, $vgpr0, 0, 0, 0, 3, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir
index 67eb719fd2c0d9..a28f347603ab7c 100644
--- a/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir
+++ b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir
@@ -1,7 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=CHECK
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=CHECK
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=GFX11
---
name: ftrunc_tonearest
@@ -15,13 +14,6 @@ body: |
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; CHECK-NEXT: $vgpr1 = V_CVT_F16_F32_e32 $vgpr0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
- ;
- ; GFX11-LABEL: name: ftrunc_tonearest
- ; GFX11: liveins: $sgpr0
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 0, implicit $mode, implicit $exec
S_ENDPGM 0
@@ -39,14 +31,6 @@ body: |
; CHECK-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
; CHECK-NEXT: $vgpr1 = V_CVT_F16_F32_e32 $vgpr0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
- ;
- ; GFX11-LABEL: name: ftrunc_upward
- ; GFX11: liveins: $sgpr0
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
- ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 1, implicit $mode, implicit $exec
S_ENDPGM 0
@@ -64,14 +48,6 @@ body: |
; CHECK-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
; CHECK-NEXT: $vgpr0 = V_CVT_F16_F32_e32 $vgpr1, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
- ;
- ; GFX11-LABEL: name: ftrunc_downward
- ; GFX11: liveins: $sgpr0
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- ; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
- ; GFX11-NEXT: $vgpr0 = V_CVT_F16_F32_fake16_e64 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0
$vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr0 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr1, 2, implicit $mode, implicit $exec
S_ENDPGM 0
@@ -89,14 +65,6 @@ body: |
; CHECK-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
; CHECK-NEXT: $vgpr1 = V_CVT_F16_F32_e32 $vgpr0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
- ;
- ; GFX11-LABEL: name: ftrunc_towardzero
- ; GFX11: liveins: $sgpr0
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- ; GFX11-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
- ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 3, implicit $mode, implicit $exec
S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
index 4a1287c56ea8ee..0ad1c30b5b5a4f 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
@@ -3,7 +3,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9ALL,GFX900 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9ALL,GFX906 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
define i16 @shl_i16(i16 %x, i16 %y) {
; GFX8-LABEL: shl_i16:
@@ -24,11 +25,17 @@ define i16 @shl_i16(i16 %x, i16 %y) {
; GFX10-NEXT: v_lshlrev_b16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: shl_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshlrev_b16 v0, v1, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: shl_i16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.l, v1.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: shl_i16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshlrev_b16 v0, v1, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%res = shl i16 %x, %y
ret i16 %res
}
@@ -52,11 +59,17 @@ define i16 @lshr_i16(i16 %x, i16 %y) {
; GFX10-NEXT: v_lshrrev_b16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: lshr_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshrrev_b16 v0, v1, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: lshr_i16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_lshrrev_b16 v0.l, v1.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: lshr_i16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b16 v0, v1, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%res = lshr i16 %x, %y
ret i16 %res
}
@@ -80,11 +93,17 @@ define i16 @ashr_i16(i16 %x, i16 %y) {
; GFX10-NEXT: v_ashrrev_i16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: ashr_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_ashrrev_i16 v0, v1, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: ashr_i16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_ashrrev_i16 v0.l, v1.l, v0.l
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: ashr_i16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_ashrrev_i16 v0, v1, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%res = ashr i16 %x, %y
ret i16 %res
}
@@ -108,11 +127,18 @@ define i16 @add_u16(i16 %x, i16 %y) {
; GFX10-NEXT: v_add_nc_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: add_u16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_add_nc_u16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_u16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_u16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_add_nc_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%res = add i16 %x, %y
ret i16 %res
}
@@ -136,11 +162,18 @@ define i16 @sub_u16(i16 %x, i16 %y) {
; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: sub_u16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_sub_nc_u16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: sub_u16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: sub_u16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_sub_nc_u16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%res = sub i16 %x, %y
ret i16 %res
}
@@ -164,11 +197,18 @@ define i16 @mul_lo_u16(i16 %x, i16 %y) {
; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: mul_lo_u16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_lo_u16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/124044
More information about the llvm-commits
mailing list