[llvm] 00955a6 - AMDGPU/GlobalISel: Fix SALU mapping for v2s16 min/max
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 11:02:29 PST 2020
Author: Matt Arsenault
Date: 2020-02-21T14:02:16-05:00
New Revision: 00955a62e4333c7ca889043d6a9033cb8cbf800d
URL: https://github.com/llvm/llvm-project/commit/00955a62e4333c7ca889043d6a9033cb8cbf800d
DIFF: https://github.com/llvm/llvm-project/commit/00955a62e4333c7ca889043d6a9033cb8cbf800d.diff
LOG: AMDGPU/GlobalISel: Fix SALU mapping for v2s16 min/max
The legalizer helper functions are unusably awkward to perform the 3-5
part legalization. This needs to be widened, scalarized, lowered, and
we should avoid creating vector extends and truncates. Manually do all
of this and expand.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index d33fb546eaf4..de21581052f4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1520,6 +1520,51 @@ static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
}
}
+static unsigned minMaxToExtend(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
+ return TargetOpcode::G_SEXT;
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX:
+ return TargetOpcode::G_ZEXT;
+ default:
+ llvm_unreachable("not in integer min/max");
+ }
+}
+
+// Emit a legalized extension from <2 x s16> to 2 32-bit components, avoiding
+// any illegal vector extend or unmerge operations.
+static std::pair<Register, Register>
+unpackV2S16ToS32(MachineIRBuilder &B, Register Src, unsigned ExtOpcode) {
+ const LLT S32 = LLT::scalar(32);
+ auto Bitcast = B.buildBitcast(S32, Src);
+
+ if (ExtOpcode == TargetOpcode::G_SEXT) {
+ auto ExtLo = B.buildSExtInReg(S32, Bitcast, 16);
+ auto ShiftHi = B.buildAShr(S32, Bitcast, B.buildConstant(S32, 16));
+ return std::make_pair(ExtLo.getReg(0), ShiftHi.getReg(0));
+ }
+
+ auto ShiftHi = B.buildLShr(S32, Bitcast, B.buildConstant(S32, 16));
+ if (ExtOpcode == TargetOpcode::G_ZEXT) {
+ auto ExtLo = B.buildAnd(S32, Bitcast, B.buildConstant(S32, 0xffff));
+ return std::make_pair(ExtLo.getReg(0), ShiftHi.getReg(0));
+ }
+
+ assert(ExtOpcode == TargetOpcode::G_ANYEXT);
+ return std::make_pair(Bitcast.getReg(0), ShiftHi.getReg(0));
+}
+
+static MachineInstr *buildExpandedScalarMinMax(MachineIRBuilder &B,
+ CmpInst::Predicate Pred,
+ Register Dst, Register Src0,
+ Register Src1) {
+ const LLT CmpType = LLT::scalar(32);
+ auto Cmp = B.buildICmp(Pred, CmpType, Src0, Src1);
+ return B.buildSelect(Dst, Cmp, Src0, Src1);
+}
+
// FIXME: Duplicated from LegalizerHelper, except changing the boolean type.
void AMDGPURegisterBankInfo::lowerScalarMinMax(MachineIRBuilder &B,
MachineInstr &MI) const {
@@ -1528,12 +1573,10 @@ void AMDGPURegisterBankInfo::lowerScalarMinMax(MachineIRBuilder &B,
Register Src1 = MI.getOperand(2).getReg();
const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
- LLT CmpType = LLT::scalar(32);
-
- auto Cmp = B.buildICmp(Pred, CmpType, Src0, Src1);
- B.buildSelect(Dst, Cmp, Src0, Src1);
+ MachineInstr *Sel = buildExpandedScalarMinMax(B, Pred, Dst, Src0, Src1);
- B.getMRI()->setRegBank(Cmp.getReg(0), AMDGPU::SGPRRegBank);
+ Register CmpReg = Sel->getOperand(1).getReg();
+ B.getMRI()->setRegBank(CmpReg, AMDGPU::SGPRRegBank);
MI.eraseFromParent();
}
@@ -2072,10 +2115,44 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// Turn scalar min/max into a compare and select.
LLT Ty = MRI.getType(DstReg);
- LLT S32 = LLT::scalar(32);
- LLT S16 = LLT::scalar(16);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S16 = LLT::scalar(16);
+ const LLT V2S16 = LLT::vector(2, 16);
+
+ if (Ty == V2S16) {
+ ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
+ GISelObserverWrapper Observer(&ApplySALU);
+ B.setChangeObserver(Observer);
- if (Ty == S16) {
+ // Need to widen to s32, and expand as cmp + select, and avoid producing
+ // illegal vector extends or unmerges that would need further
+ // legalization.
+ //
+ // TODO: Should we just readfirstlane? That should probably be handled
+ // with a UniformVGPR register bank that wouldn't need special
+ // consideration here.
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+
+ Register WideSrc0Lo, WideSrc0Hi;
+ Register WideSrc1Lo, WideSrc1Hi;
+
+ unsigned ExtendOp = minMaxToExtend(MI.getOpcode());
+
+ std::tie(WideSrc0Lo, WideSrc0Hi) = unpackV2S16ToS32(B, Src0, ExtendOp);
+ std::tie(WideSrc1Lo, WideSrc1Hi) = unpackV2S16ToS32(B, Src1, ExtendOp);
+
+ Register Lo = MRI.createGenericVirtualRegister(S32);
+ Register Hi = MRI.createGenericVirtualRegister(S32);
+ const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
+ buildExpandedScalarMinMax(B, Pred, Lo, WideSrc0Lo, WideSrc1Lo);
+ buildExpandedScalarMinMax(B, Pred, Hi, WideSrc0Hi, WideSrc1Hi);
+
+ B.buildBuildVectorTrunc(Dst, {Lo, Hi});
+ MI.eraseFromParent();
+ } else if (Ty == S16) {
ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
GISelObserverWrapper Observer(&ApplySALU);
LegalizerHelper Helper(*MF, Observer, B);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
index dd3909cf1f1e..c12d209d9bda 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
---
name: smax_s32_ss
@@ -158,3 +158,93 @@ body: |
$vgpr0 = COPY %5
...
+
+---
+name: smax_v2s16_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; CHECK-LABEL: name: smax_v2s16_ss
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+ ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; CHECK: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST]], 16
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+ ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST]], [[C]](s32)
+ ; CHECK: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; CHECK: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+ ; CHECK: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32)
+ ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[SEXT_INREG]](s32), [[SEXT_INREG1]]
+ ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT_INREG]], [[SEXT_INREG1]]
+ ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[ASHR]](s32), [[ASHR1]]
+ ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[ASHR]], [[ASHR1]]
+ ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
+ ; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $sgpr0
+ %1:_(<2 x s16>) = COPY $sgpr1
+ %2:_(<2 x s16>) = G_SMAX %0, %1
+ $sgpr0 = COPY %2
+...
+
+---
+name: smax_v2s16_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; CHECK-LABEL: name: smax_v2s16_sv
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
+ ; CHECK: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY2]], [[COPY1]]
+ ; CHECK: $vgpr0 = COPY [[SMAX]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $sgpr0
+ %1:_(<2 x s16>) = COPY $vgpr0
+ %2:_(<2 x s16>) = G_SMAX %0, %1
+ $vgpr0 = COPY %2
+...
+
+---
+name: smax_v2s16_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; CHECK-LABEL: name: smax_v2s16_vs
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
+ ; CHECK: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY]], [[COPY2]]
+ ; CHECK: $vgpr0 = COPY [[SMAX]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $sgpr0
+ %2:_(<2 x s16>) = G_SMAX %0, %1
+ $vgpr0 = COPY %2
+...
+
+---
+name: smax_v2s16_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: smax_v2s16_vv
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+ ; CHECK: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY]], [[COPY1]]
+ ; CHECK: $vgpr0 = COPY [[SMAX]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $vgpr1
+ %2:_(<2 x s16>) = G_SMAX %0, %1
+ $vgpr0 = COPY %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
index 3b3ee55b4cc2..da19ddcb86df 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
+# XUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
---
name: smin_s32_ss
@@ -161,3 +161,93 @@ body: |
$vgpr0 = COPY %5
...
+
+---
+name: smin_v2s16_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; CHECK-LABEL: name: smin_v2s16_ss
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+ ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; CHECK: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST]], 16
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+ ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST]], [[C]](s32)
+ ; CHECK: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; CHECK: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+ ; CHECK: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32)
+ ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]]
+ ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT_INREG]], [[SEXT_INREG1]]
+ ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[ASHR]](s32), [[ASHR1]]
+ ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[ASHR]], [[ASHR1]]
+ ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
+ ; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $sgpr0
+ %1:_(<2 x s16>) = COPY $sgpr1
+ %2:_(<2 x s16>) = G_SMIN %0, %1
+ $sgpr0 = COPY %2
+...
+
+---
+name: smin_v2s16_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; CHECK-LABEL: name: smin_v2s16_sv
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
+ ; CHECK: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY2]], [[COPY1]]
+ ; CHECK: $vgpr0 = COPY [[SMIN]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $sgpr0
+ %1:_(<2 x s16>) = COPY $vgpr0
+ %2:_(<2 x s16>) = G_SMIN %0, %1
+ $vgpr0 = COPY %2
+...
+
+---
+name: smin_v2s16_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; CHECK-LABEL: name: smin_v2s16_vs
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
+ ; CHECK: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY]], [[COPY2]]
+ ; CHECK: $vgpr0 = COPY [[SMIN]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $sgpr0
+ %2:_(<2 x s16>) = G_SMIN %0, %1
+ $vgpr0 = COPY %2
+...
+
+---
+name: smin_v2s16_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: smin_v2s16_vv
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+ ; CHECK: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY]], [[COPY1]]
+ ; CHECK: $vgpr0 = COPY [[SMIN]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $vgpr1
+ %2:_(<2 x s16>) = G_SMIN %0, %1
+ $vgpr0 = COPY %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
index 7b4baa727d0a..3f464f890d04 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
---
name: umax_s32_ss
@@ -161,3 +161,95 @@ body: |
$vgpr0 = COPY %5
...
+
+---
+name: umax_v2s16_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; CHECK-LABEL: name: umax_v2s16_ss
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+ ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+ ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; CHECK: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+ ; CHECK: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+ ; CHECK: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]]
+ ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[AND1]]
+ ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[AND]], [[AND1]]
+ ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[LSHR]](s32), [[LSHR1]]
+ ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[LSHR]], [[LSHR1]]
+ ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
+ ; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $sgpr0
+ %1:_(<2 x s16>) = COPY $sgpr1
+ %2:_(<2 x s16>) = G_UMAX %0, %1
+ $sgpr0 = COPY %2
+...
+
+---
+name: umax_v2s16_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; CHECK-LABEL: name: umax_v2s16_sv
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
+ ; CHECK: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY2]], [[COPY1]]
+ ; CHECK: $vgpr0 = COPY [[UMAX]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $sgpr0
+ %1:_(<2 x s16>) = COPY $vgpr0
+ %2:_(<2 x s16>) = G_UMAX %0, %1
+ $vgpr0 = COPY %2
+...
+
+---
+name: umax_v2s16_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; CHECK-LABEL: name: umax_v2s16_vs
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
+ ; CHECK: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY]], [[COPY2]]
+ ; CHECK: $vgpr0 = COPY [[UMAX]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $sgpr0
+ %2:_(<2 x s16>) = G_UMAX %0, %1
+ $vgpr0 = COPY %2
+...
+
+---
+name: umax_v2s16_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: umax_v2s16_vv
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+ ; CHECK: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY]], [[COPY1]]
+ ; CHECK: $vgpr0 = COPY [[UMAX]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $vgpr1
+ %2:_(<2 x s16>) = G_UMAX %0, %1
+ $vgpr0 = COPY %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir
index 06d7abfb051f..35072751a069 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
---
name: umin_s32_ss
@@ -165,3 +165,95 @@ body: |
$vgpr0 = COPY %5
...
+
+---
+name: umin_v2s16_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; CHECK-LABEL: name: umin_v2s16_ss
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+ ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+ ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; CHECK: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+ ; CHECK: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+ ; CHECK: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
+ ; CHECK: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]]
+ ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]]
+ ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[AND]], [[AND1]]
+ ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[LSHR]](s32), [[LSHR1]]
+ ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[LSHR]], [[LSHR1]]
+ ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
+ ; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $sgpr0
+ %1:_(<2 x s16>) = COPY $sgpr1
+ %2:_(<2 x s16>) = G_UMIN %0, %1
+ $sgpr0 = COPY %2
+...
+
+---
+name: umin_v2s16_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; CHECK-LABEL: name: umin_v2s16_sv
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
+ ; CHECK: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY2]], [[COPY1]]
+ ; CHECK: $vgpr0 = COPY [[UMIN]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $sgpr0
+ %1:_(<2 x s16>) = COPY $vgpr0
+ %2:_(<2 x s16>) = G_UMIN %0, %1
+ $vgpr0 = COPY %2
+...
+
+---
+name: umin_v2s16_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+
+ ; CHECK-LABEL: name: umin_v2s16_vs
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
+ ; CHECK: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY]], [[COPY2]]
+ ; CHECK: $vgpr0 = COPY [[UMIN]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $sgpr0
+ %2:_(<2 x s16>) = G_UMIN %0, %1
+ $vgpr0 = COPY %2
+...
+
+---
+name: umin_v2s16_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: umin_v2s16_vv
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+ ; CHECK: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY]], [[COPY1]]
+ ; CHECK: $vgpr0 = COPY [[UMIN]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $vgpr1
+ %2:_(<2 x s16>) = G_UMIN %0, %1
+ $vgpr0 = COPY %2
+...
More information about the llvm-commits
mailing list