[llvm] [AMDGPU][True16][CodeGen] gisel true16 for ICMP (PR #128913)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 13 09:03:07 PDT 2025
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/128913
>From 9fea04e53971287e53ec219cca78dcf755373cc6 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Fri, 28 Feb 2025 14:14:35 -0500
Subject: [PATCH 1/2] gisel update for icmp
---
.../AMDGPU/AMDGPUInstructionSelector.cpp | 16 +-
.../GlobalISel/inst-select-icmp.s16.mir | 337 +++++++++++++++---
2 files changed, 292 insertions(+), 61 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 441fb5730a6d8..2d324973315bc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1478,10 +1478,17 @@ bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(MachineInstr &I) const {
if (Opcode == -1)
return false;
- MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
- I.getOperand(0).getReg())
- .add(I.getOperand(2))
- .add(I.getOperand(3));
+ MachineInstrBuilder ICmp =
+ BuildMI(*BB, &I, DL, TII.get(Opcode), I.getOperand(0).getReg());
+ if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src0_modifiers))
+ ICmp.addImm(0);
+ ICmp.add(I.getOperand(2));
+ if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src1_modifiers))
+ ICmp.addImm(0);
+ ICmp.add(I.getOperand(3));
+ if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel))
+ ICmp.addImm(0); // op_sel
+
RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
*TRI.getBoolRC(), *MRI);
bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
@@ -4596,6 +4603,7 @@ AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
}};
}
+// FIXME-TRUE16 remove when fake16 is removed
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVINTERPMods(MachineOperand &Root) const {
Register Src;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
index d45bc31a12729..63aa8b4dc1b4f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
@@ -1,7 +1,10 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX11-TRUE16 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX12-TRUE16 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX12-FAKE16 %s
---
@@ -29,13 +32,39 @@ body: |
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
;
- ; GFX11-LABEL: name: icmp_eq_s16_sv
- ; GFX11: liveins: $sgpr0, $vgpr0
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_eq_s16_sv
+ ; GFX11-TRUE16: liveins: $sgpr0, $vgpr0
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY]], 0, [[COPY2]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_eq_s16_sv
+ ; GFX11-FAKE16: liveins: $sgpr0, $vgpr0
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_eq_s16_sv
+ ; GFX12-TRUE16: liveins: $sgpr0, $vgpr0
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY]], 0, [[COPY2]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_eq_s16_sv
+ ; GFX12-FAKE16: liveins: $sgpr0, $vgpr0
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s16) = G_TRUNC %0
@@ -70,13 +99,39 @@ body: |
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
;
- ; GFX11-LABEL: name: icmp_eq_s16_vs
- ; GFX11: liveins: $sgpr0, $vgpr0
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_eq_s16_vs
+ ; GFX11-TRUE16: liveins: $sgpr0, $vgpr0
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY1]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_eq_s16_vs
+ ; GFX11-FAKE16: liveins: $sgpr0, $vgpr0
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_eq_s16_vs
+ ; GFX12-TRUE16: liveins: $sgpr0, $vgpr0
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY1]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_eq_s16_vs
+ ; GFX12-FAKE16: liveins: $sgpr0, $vgpr0
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX12-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s16) = G_TRUNC %0
@@ -111,13 +166,41 @@ body: |
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
;
- ; GFX11-LABEL: name: icmp_eq_s16_vv
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_eq_s16_vv
+ ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_eq_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_eq_s16_vv
+ ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_eq_s16_vv
+ ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -152,13 +235,41 @@ body: |
; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]]
;
- ; GFX11-LABEL: name: icmp_ne_s16_vv
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_ne_s16_vv
+ ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_ne_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-FAKE16-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_ne_s16_vv
+ ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_ne_s16_vv
+ ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-FAKE16-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -193,13 +304,41 @@ body: |
; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]]
;
- ; GFX11-LABEL: name: icmp_slt_s16_vv
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_slt_s16_vv
+ ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_slt_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-FAKE16-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_slt_s16_vv
+ ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_slt_s16_vv
+ ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-FAKE16-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -234,13 +373,41 @@ body: |
; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]]
;
- ; GFX11-LABEL: name: icmp_sle_s16_vv
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_sle_s16_vv
+ ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_sle_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-FAKE16-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_sle_s16_vv
+ ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_sle_s16_vv
+ ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-FAKE16-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -275,13 +442,41 @@ body: |
; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]]
;
- ; GFX11-LABEL: name: icmp_ult_s16_vv
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LT_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_ult_s16_vv
+ ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_ult_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-FAKE16-NEXT: [[V_CMP_LT_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_ult_s16_vv
+ ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_ult_s16_vv
+ ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-FAKE16-NEXT: [[V_CMP_LT_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -316,13 +511,41 @@ body: |
; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]]
;
- ; GFX11-LABEL: name: icmp_ule_s16_vv
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_ule_s16_vv
+ ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_ule_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-FAKE16-NEXT: [[V_CMP_LE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_ule_s16_vv
+ ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_ule_s16_vv
+ ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-FAKE16-NEXT: [[V_CMP_LE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
>From e5dba6a586318c7cdebcb8a2a381d9ce8e19f1de Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Mon, 3 Mar 2025 16:42:44 -0500
Subject: [PATCH 2/2] address comment
---
.../AMDGPU/AMDGPUInstructionSelector.cpp | 24 +++++++++++--------
1 file changed, 14 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 2d324973315bc..2ee82381c4ef0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1478,16 +1478,20 @@ bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(MachineInstr &I) const {
if (Opcode == -1)
return false;
- MachineInstrBuilder ICmp =
- BuildMI(*BB, &I, DL, TII.get(Opcode), I.getOperand(0).getReg());
- if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src0_modifiers))
- ICmp.addImm(0);
- ICmp.add(I.getOperand(2));
- if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src1_modifiers))
- ICmp.addImm(0);
- ICmp.add(I.getOperand(3));
- if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel))
- ICmp.addImm(0); // op_sel
+ MachineInstrBuilder ICmp;
+ // t16 instructions
+ if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src0_modifiers)) {
+ ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), I.getOperand(0).getReg())
+ .addImm(0)
+ .add(I.getOperand(2))
+ .addImm(0)
+ .add(I.getOperand(3))
+ .addImm(0); // op_sel
+ } else {
+ ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), I.getOperand(0).getReg())
+ .add(I.getOperand(2))
+ .add(I.getOperand(3));
+ }
RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
*TRI.getBoolRC(), *MRI);
More information about the llvm-commits
mailing list