[llvm] 00955a6 - AMDGPU/GlobalISel: Fix SALU mapping for v2s16 min/max

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 21 11:02:29 PST 2020


Author: Matt Arsenault
Date: 2020-02-21T14:02:16-05:00
New Revision: 00955a62e4333c7ca889043d6a9033cb8cbf800d

URL: https://github.com/llvm/llvm-project/commit/00955a62e4333c7ca889043d6a9033cb8cbf800d
DIFF: https://github.com/llvm/llvm-project/commit/00955a62e4333c7ca889043d6a9033cb8cbf800d.diff

LOG: AMDGPU/GlobalISel: Fix SALU mapping for v2s16 min/max

The legalizer helper functions are unusably awkward to perform the 3-5
part legalization. This needs to be widened, scalarized, lowered, and
we should avoid creating vector extends and truncates. Manually do all
of this and expand.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index d33fb546eaf4..de21581052f4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1520,6 +1520,51 @@ static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
   }
 }
 
+static unsigned minMaxToExtend(unsigned Opc) {
+  switch (Opc) {
+  case TargetOpcode::G_SMIN:
+  case TargetOpcode::G_SMAX:
+    return TargetOpcode::G_SEXT;
+  case TargetOpcode::G_UMIN:
+  case TargetOpcode::G_UMAX:
+    return TargetOpcode::G_ZEXT;
+  default:
+    llvm_unreachable("not in integer min/max");
+  }
+}
+
+// Emit a legalized extension from <2 x s16> to 2 32-bit components, avoiding
+// any illegal vector extend or unmerge operations.
+static std::pair<Register, Register>
+unpackV2S16ToS32(MachineIRBuilder &B, Register Src, unsigned ExtOpcode) {
+  const LLT S32 = LLT::scalar(32);
+  auto Bitcast = B.buildBitcast(S32, Src);
+
+  if (ExtOpcode == TargetOpcode::G_SEXT) {
+    auto ExtLo = B.buildSExtInReg(S32, Bitcast, 16);
+    auto ShiftHi = B.buildAShr(S32, Bitcast, B.buildConstant(S32, 16));
+    return std::make_pair(ExtLo.getReg(0), ShiftHi.getReg(0));
+  }
+
+  auto ShiftHi = B.buildLShr(S32, Bitcast, B.buildConstant(S32, 16));
+  if (ExtOpcode == TargetOpcode::G_ZEXT) {
+    auto ExtLo = B.buildAnd(S32, Bitcast, B.buildConstant(S32, 0xffff));
+    return std::make_pair(ExtLo.getReg(0), ShiftHi.getReg(0));
+  }
+
+  assert(ExtOpcode == TargetOpcode::G_ANYEXT);
+  return std::make_pair(Bitcast.getReg(0), ShiftHi.getReg(0));
+}
+
+static MachineInstr *buildExpandedScalarMinMax(MachineIRBuilder &B,
+                                               CmpInst::Predicate Pred,
+                                               Register Dst, Register Src0,
+                                               Register Src1) {
+  const LLT CmpType = LLT::scalar(32);
+  auto Cmp = B.buildICmp(Pred, CmpType, Src0, Src1);
+  return B.buildSelect(Dst, Cmp, Src0, Src1);
+}
+
 // FIXME: Duplicated from LegalizerHelper, except changing the boolean type.
 void AMDGPURegisterBankInfo::lowerScalarMinMax(MachineIRBuilder &B,
                                                MachineInstr &MI) const {
@@ -1528,12 +1573,10 @@ void AMDGPURegisterBankInfo::lowerScalarMinMax(MachineIRBuilder &B,
   Register Src1 = MI.getOperand(2).getReg();
 
   const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
-  LLT CmpType = LLT::scalar(32);
-
-  auto Cmp = B.buildICmp(Pred, CmpType, Src0, Src1);
-  B.buildSelect(Dst, Cmp, Src0, Src1);
+  MachineInstr *Sel = buildExpandedScalarMinMax(B, Pred, Dst, Src0, Src1);
 
-  B.getMRI()->setRegBank(Cmp.getReg(0), AMDGPU::SGPRRegBank);
+  Register CmpReg = Sel->getOperand(1).getReg();
+  B.getMRI()->setRegBank(CmpReg, AMDGPU::SGPRRegBank);
   MI.eraseFromParent();
 }
 
@@ -2072,10 +2115,44 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
 
     // Turn scalar min/max into a compare and select.
     LLT Ty = MRI.getType(DstReg);
-    LLT S32 = LLT::scalar(32);
-    LLT S16 = LLT::scalar(16);
+    const LLT S32 = LLT::scalar(32);
+    const LLT S16 = LLT::scalar(16);
+    const LLT V2S16 = LLT::vector(2, 16);
+
+    if (Ty == V2S16) {
+      ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
+      GISelObserverWrapper Observer(&ApplySALU);
+      B.setChangeObserver(Observer);
 
-    if (Ty == S16) {
+      // Need to widen to s32, and expand as cmp + select, and avoid producing
+      // illegal vector extends or unmerges that would need further
+      // legalization.
+      //
+      // TODO: Should we just readfirstlane? That should probably be handled
+      // with a UniformVGPR register bank that wouldn't need special
+      // consideration here.
+
+      Register Dst = MI.getOperand(0).getReg();
+      Register Src0 = MI.getOperand(1).getReg();
+      Register Src1 = MI.getOperand(2).getReg();
+
+      Register WideSrc0Lo, WideSrc0Hi;
+      Register WideSrc1Lo, WideSrc1Hi;
+
+      unsigned ExtendOp = minMaxToExtend(MI.getOpcode());
+
+      std::tie(WideSrc0Lo, WideSrc0Hi) = unpackV2S16ToS32(B, Src0, ExtendOp);
+      std::tie(WideSrc1Lo, WideSrc1Hi) = unpackV2S16ToS32(B, Src1, ExtendOp);
+
+      Register Lo = MRI.createGenericVirtualRegister(S32);
+      Register Hi = MRI.createGenericVirtualRegister(S32);
+      const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
+      buildExpandedScalarMinMax(B, Pred, Lo, WideSrc0Lo, WideSrc1Lo);
+      buildExpandedScalarMinMax(B, Pred, Hi, WideSrc0Hi, WideSrc1Hi);
+
+      B.buildBuildVectorTrunc(Dst, {Lo, Hi});
+      MI.eraseFromParent();
+    } else if (Ty == S16) {
       ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
       GISelObserverWrapper Observer(&ApplySALU);
       LegalizerHelper Helper(*MF, Observer, B);

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
index dd3909cf1f1e..c12d209d9bda 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s  | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s  | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s  | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s  | FileCheck %s
 
 ---
 name: smax_s32_ss
@@ -158,3 +158,93 @@ body: |
     $vgpr0 = COPY %5
 
 ...
+
+---
+name: smax_v2s16_ss
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; CHECK-LABEL: name: smax_v2s16_ss
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST]], 16
+    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+    ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST]], [[C]](s32)
+    ; CHECK: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16
+    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+    ; CHECK: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32)
+    ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[SEXT_INREG]](s32), [[SEXT_INREG1]]
+    ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT_INREG]], [[SEXT_INREG1]]
+    ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[ASHR]](s32), [[ASHR1]]
+    ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[ASHR]], [[ASHR1]]
+    ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
+    ; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $sgpr0
+    %1:_(<2 x s16>) = COPY $sgpr1
+    %2:_(<2 x s16>) = G_SMAX %0, %1
+    $sgpr0 = COPY %2
+...
+
+---
+name: smax_v2s16_sv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: smax_v2s16_sv
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
+    ; CHECK: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY2]], [[COPY1]]
+    ; CHECK: $vgpr0 = COPY [[SMAX]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $sgpr0
+    %1:_(<2 x s16>) = COPY $vgpr0
+    %2:_(<2 x s16>) = G_SMAX %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: smax_v2s16_vs
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: smax_v2s16_vs
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
+    ; CHECK: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY]], [[COPY2]]
+    ; CHECK: $vgpr0 = COPY [[SMAX]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $sgpr0
+    %2:_(<2 x s16>) = G_SMAX %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: smax_v2s16_vv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: smax_v2s16_vv
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; CHECK: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY]], [[COPY1]]
+    ; CHECK: $vgpr0 = COPY [[SMAX]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<2 x s16>) = G_SMAX %0, %1
+    $vgpr0 = COPY %2
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
index 3b3ee55b4cc2..da19ddcb86df 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s  | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s  | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s  | FileCheck %s
+# XUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s  | FileCheck %s
 
 ---
 name: smin_s32_ss
@@ -161,3 +161,93 @@ body: |
     $vgpr0 = COPY %5
 
 ...
+
+---
+name: smin_v2s16_ss
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; CHECK-LABEL: name: smin_v2s16_ss
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST]], 16
+    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+    ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST]], [[C]](s32)
+    ; CHECK: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16
+    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+    ; CHECK: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32)
+    ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]]
+    ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT_INREG]], [[SEXT_INREG1]]
+    ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[ASHR]](s32), [[ASHR1]]
+    ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[ASHR]], [[ASHR1]]
+    ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
+    ; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $sgpr0
+    %1:_(<2 x s16>) = COPY $sgpr1
+    %2:_(<2 x s16>) = G_SMIN %0, %1
+    $sgpr0 = COPY %2
+...
+
+---
+name: smin_v2s16_sv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: smin_v2s16_sv
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
+    ; CHECK: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY2]], [[COPY1]]
+    ; CHECK: $vgpr0 = COPY [[SMIN]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $sgpr0
+    %1:_(<2 x s16>) = COPY $vgpr0
+    %2:_(<2 x s16>) = G_SMIN %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: smin_v2s16_vs
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: smin_v2s16_vs
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
+    ; CHECK: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY]], [[COPY2]]
+    ; CHECK: $vgpr0 = COPY [[SMIN]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $sgpr0
+    %2:_(<2 x s16>) = G_SMIN %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: smin_v2s16_vv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: smin_v2s16_vv
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; CHECK: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY]], [[COPY1]]
+    ; CHECK: $vgpr0 = COPY [[SMIN]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<2 x s16>) = G_SMIN %0, %1
+    $vgpr0 = COPY %2
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
index 7b4baa727d0a..3f464f890d04 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s  | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s  | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s  | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s  | FileCheck %s
 
 ---
 name: umax_s32_ss
@@ -161,3 +161,95 @@ body: |
     $vgpr0 = COPY %5
 
 ...
+
+---
+name: umax_v2s16_ss
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; CHECK-LABEL: name: umax_v2s16_ss
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST]], [[C1]]
+    ; CHECK: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+    ; CHECK: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; CHECK: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[AND1]]
+    ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[AND]], [[AND1]]
+    ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[LSHR]](s32), [[LSHR1]]
+    ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[LSHR]], [[LSHR1]]
+    ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
+    ; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $sgpr0
+    %1:_(<2 x s16>) = COPY $sgpr1
+    %2:_(<2 x s16>) = G_UMAX %0, %1
+    $sgpr0 = COPY %2
+...
+
+---
+name: umax_v2s16_sv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: umax_v2s16_sv
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
+    ; CHECK: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY2]], [[COPY1]]
+    ; CHECK: $vgpr0 = COPY [[UMAX]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $sgpr0
+    %1:_(<2 x s16>) = COPY $vgpr0
+    %2:_(<2 x s16>) = G_UMAX %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: umax_v2s16_vs
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: umax_v2s16_vs
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
+    ; CHECK: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY]], [[COPY2]]
+    ; CHECK: $vgpr0 = COPY [[UMAX]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $sgpr0
+    %2:_(<2 x s16>) = G_UMAX %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: umax_v2s16_vv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: umax_v2s16_vv
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; CHECK: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY]], [[COPY1]]
+    ; CHECK: $vgpr0 = COPY [[UMAX]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<2 x s16>) = G_UMAX %0, %1
+    $vgpr0 = COPY %2
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir
index 06d7abfb051f..35072751a069 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s  | FileCheck %s
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s  | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s  | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s  | FileCheck %s
 
 ---
 name: umin_s32_ss
@@ -165,3 +165,95 @@ body: |
     $vgpr0 = COPY %5
 
 ...
+
+---
+name: umin_v2s16_ss
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; CHECK-LABEL: name: umin_v2s16_ss
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST]], [[C1]]
+    ; CHECK: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+    ; CHECK: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+    ; CHECK: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]]
+    ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]]
+    ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[AND]], [[AND1]]
+    ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[LSHR]](s32), [[LSHR1]]
+    ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[LSHR]], [[LSHR1]]
+    ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
+    ; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $sgpr0
+    %1:_(<2 x s16>) = COPY $sgpr1
+    %2:_(<2 x s16>) = G_UMIN %0, %1
+    $sgpr0 = COPY %2
+...
+
+---
+name: umin_v2s16_sv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: umin_v2s16_sv
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
+    ; CHECK: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY2]], [[COPY1]]
+    ; CHECK: $vgpr0 = COPY [[UMIN]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $sgpr0
+    %1:_(<2 x s16>) = COPY $vgpr0
+    %2:_(<2 x s16>) = G_UMIN %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: umin_v2s16_vs
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: umin_v2s16_vs
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
+    ; CHECK: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY]], [[COPY2]]
+    ; CHECK: $vgpr0 = COPY [[UMIN]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $sgpr0
+    %2:_(<2 x s16>) = G_UMIN %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: umin_v2s16_vv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: umin_v2s16_vv
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; CHECK: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY]], [[COPY1]]
+    ; CHECK: $vgpr0 = COPY [[UMIN]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<2 x s16>) = G_UMIN %0, %1
+    $vgpr0 = COPY %2
+...


        


More information about the llvm-commits mailing list