[llvm] 15a5b3a - [AMDGPU][True16][CodeGen] gisel true16 for ICMP (#128913)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 13 09:03:21 PDT 2025
Author: Brox Chen
Date: 2025-03-13T12:03:17-04:00
New Revision: 15a5b3a192165bb1372c89bfc841067dd26170eb
URL: https://github.com/llvm/llvm-project/commit/15a5b3a192165bb1372c89bfc841067dd26170eb
DIFF: https://github.com/llvm/llvm-project/commit/15a5b3a192165bb1372c89bfc841067dd26170eb.diff
LOG: [AMDGPU][True16][CodeGen] gisel true16 for ICMP (#128913)
GlobalIsel true16 selection for ICMP
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 441fb5730a6d8..2ee82381c4ef0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1478,10 +1478,21 @@ bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(MachineInstr &I) const {
if (Opcode == -1)
return false;
- MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
- I.getOperand(0).getReg())
- .add(I.getOperand(2))
- .add(I.getOperand(3));
+ MachineInstrBuilder ICmp;
+ // t16 instructions
+ if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src0_modifiers)) {
+ ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), I.getOperand(0).getReg())
+ .addImm(0)
+ .add(I.getOperand(2))
+ .addImm(0)
+ .add(I.getOperand(3))
+ .addImm(0); // op_sel
+ } else {
+ ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), I.getOperand(0).getReg())
+ .add(I.getOperand(2))
+ .add(I.getOperand(3));
+ }
+
RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
*TRI.getBoolRC(), *MRI);
bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
@@ -4596,6 +4607,7 @@ AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
}};
}
+// FIXME-TRUE16 remove when fake16 is removed
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVINTERPMods(MachineOperand &Root) const {
Register Src;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
index d45bc31a12729..63aa8b4dc1b4f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
@@ -1,7 +1,10 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX11-TRUE16 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX12-TRUE16 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX12-FAKE16 %s
---
@@ -29,13 +32,39 @@ body: |
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
;
- ; GFX11-LABEL: name: icmp_eq_s16_sv
- ; GFX11: liveins: $sgpr0, $vgpr0
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_eq_s16_sv
+ ; GFX11-TRUE16: liveins: $sgpr0, $vgpr0
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY]], 0, [[COPY2]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_eq_s16_sv
+ ; GFX11-FAKE16: liveins: $sgpr0, $vgpr0
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_eq_s16_sv
+ ; GFX12-TRUE16: liveins: $sgpr0, $vgpr0
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY]], 0, [[COPY2]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_eq_s16_sv
+ ; GFX12-FAKE16: liveins: $sgpr0, $vgpr0
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s16) = G_TRUNC %0
@@ -70,13 +99,39 @@ body: |
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
;
- ; GFX11-LABEL: name: icmp_eq_s16_vs
- ; GFX11: liveins: $sgpr0, $vgpr0
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_eq_s16_vs
+ ; GFX11-TRUE16: liveins: $sgpr0, $vgpr0
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY1]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_eq_s16_vs
+ ; GFX11-FAKE16: liveins: $sgpr0, $vgpr0
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_eq_s16_vs
+ ; GFX12-TRUE16: liveins: $sgpr0, $vgpr0
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY1]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_eq_s16_vs
+ ; GFX12-FAKE16: liveins: $sgpr0, $vgpr0
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX12-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s16) = G_TRUNC %0
@@ -111,13 +166,41 @@ body: |
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
;
- ; GFX11-LABEL: name: icmp_eq_s16_vv
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_eq_s16_vv
+ ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_eq_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_eq_s16_vv
+ ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_eq_s16_vv
+ ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -152,13 +235,41 @@ body: |
; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]]
;
- ; GFX11-LABEL: name: icmp_ne_s16_vv
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_ne_s16_vv
+ ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_ne_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-FAKE16-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_ne_s16_vv
+ ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_ne_s16_vv
+ ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-FAKE16-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -193,13 +304,41 @@ body: |
; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]]
;
- ; GFX11-LABEL: name: icmp_slt_s16_vv
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_slt_s16_vv
+ ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_slt_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-FAKE16-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_slt_s16_vv
+ ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_slt_s16_vv
+ ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-FAKE16-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -234,13 +373,41 @@ body: |
; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]]
;
- ; GFX11-LABEL: name: icmp_sle_s16_vv
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_sle_s16_vv
+ ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_sle_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-FAKE16-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_sle_s16_vv
+ ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_sle_s16_vv
+ ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-FAKE16-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -275,13 +442,41 @@ body: |
; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]]
;
- ; GFX11-LABEL: name: icmp_ult_s16_vv
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LT_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_ult_s16_vv
+ ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_ult_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-FAKE16-NEXT: [[V_CMP_LT_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_ult_s16_vv
+ ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_ult_s16_vv
+ ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-FAKE16-NEXT: [[V_CMP_LT_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -316,13 +511,41 @@ body: |
; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]]
;
- ; GFX11-LABEL: name: icmp_ule_s16_vv
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_ule_s16_vv
+ ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_ule_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-FAKE16-NEXT: [[V_CMP_LE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_ule_s16_vv
+ ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_ule_s16_vv
+ ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-FAKE16-NEXT: [[V_CMP_LE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
More information about the llvm-commits
mailing list