[llvm] 641906d - AMDGPU/GlobalISel: Fix constant bus restriction errors for med3

Petar Avramovic via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 1 12:37:11 PST 2021


Author: Petar Avramovic
Date: 2021-12-01T21:36:37+01:00
New Revision: 641906da8de2f131a433ec26d7fd44ee74abfa5b

URL: https://github.com/llvm/llvm-project/commit/641906da8de2f131a433ec26d7fd44ee74abfa5b
DIFF: https://github.com/llvm/llvm-project/commit/641906da8de2f131a433ec26d7fd44ee74abfa5b.diff

LOG: AMDGPU/GlobalISel: Fix constant bus restriction errors for med3

Detected on targets older then gfx10 (e.g. gfx9) for constants that are
too large to be inlined (constant are sgpr by default).
In med3 combine it is expected that regbankselect maps all operands of
min/max we try to match to vgpr. However constants are mapped to sgpr
and there will be a sgpr-to-vgpr copy. Matchers look through sgpr-to-vgpr
copies and return sgpr and these break constant bus restriction.
Build med3 with all vgpr operands. Use existing sgpr-to-vgpr copies for
matched sgprs. If there is no such copy (not expected) build one.

Differential Revision: https://reviews.llvm.org/D114700

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index d7dc9ee4117b7..12b5830ef930e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -45,6 +45,7 @@ class AMDGPURegBankCombinerHelper {
         TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){};
 
   bool isVgprRegBank(Register Reg);
+  Register getAsVgpr(Register Reg);
 
   struct MinMaxMedOpc {
     unsigned Min, Max, Med;
@@ -69,6 +70,23 @@ bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
   return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
 }
 
+Register AMDGPURegBankCombinerHelper::getAsVgpr(Register Reg) {
+  if (isVgprRegBank(Reg))
+    return Reg;
+
+  // Search for existing copy of Reg to vgpr.
+  for (MachineInstr &Use : MRI.use_instructions(Reg)) {
+    Register Def = Use.getOperand(0).getReg();
+    if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
+      return Def;
+  }
+
+  // Copy Reg to vgpr.
+  Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0);
+  MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
+  return VgprReg;
+}
+
 AMDGPURegBankCombinerHelper::MinMaxMedOpc
 AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
   switch (Opc) {
@@ -134,7 +152,9 @@ void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
                                             Med3MatchInfo &MatchInfo) {
   B.setInstrAndDebugLoc(MI);
   B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
-               {MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2}, MI.getFlags());
+               {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
+                getAsVgpr(MatchInfo.Val2)},
+               MI.getFlags());
   MI.eraseFromParent();
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir
index ec4755a6ea989..da576626e9954 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir
@@ -1,4 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
 # RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
 
 ---
@@ -12,14 +13,17 @@ body: |
 
     ; CHECK-LABEL: name: test_min_max_ValK0_K1_i32
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
-    ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %2:sgpr(s32) = G_CONSTANT i32 -12
@@ -44,14 +48,17 @@ body: |
 
     ; CHECK-LABEL: name: min_max_ValK0_K1_i32
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
-    ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %2:sgpr(s32) = G_CONSTANT i32 -12
@@ -76,14 +83,17 @@ body: |
 
     ; CHECK-LABEL: name: test_min_K1max_ValK0__i32
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
-    ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %2:sgpr(s32) = G_CONSTANT i32 -12
@@ -108,14 +118,17 @@ body: |
 
     ; CHECK-LABEL: name: test_min_K1max_K0Val__i32
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
-    ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %2:sgpr(s32) = G_CONSTANT i32 -12
@@ -140,14 +153,17 @@ body: |
 
     ; CHECK-LABEL: name: test_max_min_ValK1_K0_i32
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
-    ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %2:sgpr(s32) = G_CONSTANT i32 17
@@ -172,14 +188,17 @@ body: |
 
     ; CHECK-LABEL: name: test_max_min_K1Val_K0_i32
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
-    ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %2:sgpr(s32) = G_CONSTANT i32 17
@@ -204,14 +223,17 @@ body: |
 
     ; CHECK-LABEL: name: test_max_K0min_ValK1__i32
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
-    ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %2:sgpr(s32) = G_CONSTANT i32 17
@@ -236,14 +258,17 @@ body: |
 
     ; CHECK-LABEL: name: test_max_K0min_K1Val__i32
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
-    ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %2:sgpr(s32) = G_CONSTANT i32 17
@@ -268,19 +293,20 @@ body: |
 
     ; CHECK-LABEL: name: test_max_K0min_K1Val__v2i16
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[C1]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; CHECK: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY2]], [[COPY]]
-    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; CHECK: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY3]], [[SMIN]]
-    ; CHECK: $vgpr0 = COPY [[SMAX]](<2 x s16>)
-    ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[C1]](s32)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY2]], [[COPY]]
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY3]], [[SMIN]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x s16>)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(<2 x s16>) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %9:sgpr(s32) = G_CONSTANT i32 17
@@ -307,15 +333,16 @@ body: |
 
     ; CHECK-LABEL: name: test_uniform_min_max
     ; CHECK: liveins: $sgpr2
-    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
-    ; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SMAX]], [[C1]]
-    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[SMIN]](s32)
-    ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
-    ; CHECK: $sgpr0 = COPY [[INT]](s32)
-    ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SMAX]], [[C1]]
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[SMIN]](s32)
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
+    ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
+    ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
     %0:sgpr(s32) = COPY $sgpr2
     %3:sgpr(s32) = G_CONSTANT i32 -12
     %4:sgpr(s32) = G_SMAX %0, %3
@@ -326,3 +353,38 @@ body: |
     $sgpr0 = COPY %7(s32)
     SI_RETURN_TO_EPILOG implicit $sgpr0
 ...
+
+---
+name: test_non_inline_constant_i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+  bb.1:
+    liveins: $vgpr0, $sgpr30_sgpr31
+
+    ; CHECK-LABEL: name: test_non_inline_constant_i32
+    ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:sgpr_64 = COPY $sgpr30_sgpr31
+    %2:sgpr(s32) = G_CONSTANT i32 -12
+    %7:vgpr(s32) = COPY %2(s32)
+    %3:vgpr(s32) = G_SMAX %0, %7
+    %4:sgpr(s32) = G_CONSTANT i32 65
+    %8:vgpr(s32) = COPY %4(s32)
+    %5:vgpr(s32) = G_SMIN %3, %8
+    $vgpr0 = COPY %5(s32)
+    %6:ccr_sgpr_64 = COPY %1
+    S_SETPC_B64_return %6, implicit $vgpr0
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir
index cc5131b669994..8c5d796a60e06 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir
@@ -1,4 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
 # RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
 
 ---
@@ -12,14 +13,17 @@ body: |
 
     ; CHECK-LABEL: name: test_min_max_ValK0_K1_u32
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
-    ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %2:sgpr(s32) = G_CONSTANT i32 12
@@ -44,14 +48,17 @@ body: |
 
     ; CHECK-LABEL: name: min_max_ValK0_K1_i32
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
-    ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %2:sgpr(s32) = G_CONSTANT i32 12
@@ -76,14 +83,17 @@ body: |
 
     ; CHECK-LABEL: name: test_min_K1max_ValK0__u32
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
-    ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %2:sgpr(s32) = G_CONSTANT i32 12
@@ -108,14 +118,17 @@ body: |
 
     ; CHECK-LABEL: name: test_min_K1max_K0Val__u32
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
-    ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %2:sgpr(s32) = G_CONSTANT i32 12
@@ -140,14 +153,17 @@ body: |
 
     ; CHECK-LABEL: name: test_max_min_ValK1_K0_u32
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
-    ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %2:sgpr(s32) = G_CONSTANT i32 17
@@ -172,14 +188,17 @@ body: |
 
     ; CHECK-LABEL: name: test_max_min_K1Val_K0_u32
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
-    ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %2:sgpr(s32) = G_CONSTANT i32 17
@@ -204,14 +223,17 @@ body: |
 
     ; CHECK-LABEL: name: test_max_K0min_ValK1__u32
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
-    ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %2:sgpr(s32) = G_CONSTANT i32 17
@@ -236,14 +258,17 @@ body: |
 
     ; CHECK-LABEL: name: test_max_K0min_K1Val__u32
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
-    ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %2:sgpr(s32) = G_CONSTANT i32 17
@@ -268,19 +293,20 @@ body: |
 
     ; CHECK-LABEL: name: test_max_K0min_K1Val__v2u16
     ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[C1]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
-    ; CHECK: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY2]], [[COPY]]
-    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-    ; CHECK: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY3]], [[UMIN]]
-    ; CHECK: $vgpr0 = COPY [[UMAX]](<2 x s16>)
-    ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-    ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[C1]](s32)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY2]], [[COPY]]
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+    ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY3]], [[UMIN]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x s16>)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
     %0:vgpr(<2 x s16>) = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr30_sgpr31
     %9:sgpr(s32) = G_CONSTANT i32 17
@@ -307,15 +333,16 @@ body: |
 
     ; CHECK-LABEL: name: test_uniform_min_max
     ; CHECK: liveins: $sgpr2
-    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
-    ; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[C]]
-    ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
-    ; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[UMAX]], [[C1]]
-    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UMIN]](s32)
-    ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
-    ; CHECK: $sgpr0 = COPY [[INT]](s32)
-    ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[C]]
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+    ; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[UMAX]], [[C1]]
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UMIN]](s32)
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
+    ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
+    ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
     %0:sgpr(s32) = COPY $sgpr2
     %3:sgpr(s32) = G_CONSTANT i32 12
     %4:sgpr(s32) = G_UMAX %0, %3
@@ -327,3 +354,38 @@ body: |
     SI_RETURN_TO_EPILOG implicit $sgpr0
 
 ...
+
+---
+name: test_non_inline_constant_i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+  bb.1:
+    liveins: $vgpr0, $sgpr30_sgpr31
+
+    ; CHECK-LABEL: name: test_non_inline_constant_i32
+    ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:sgpr_64 = COPY $sgpr30_sgpr31
+    %2:sgpr(s32) = G_CONSTANT i32 12
+    %7:vgpr(s32) = COPY %2(s32)
+    %3:vgpr(s32) = G_UMAX %0, %7
+    %4:sgpr(s32) = G_CONSTANT i32 65
+    %8:vgpr(s32) = COPY %4(s32)
+    %5:vgpr(s32) = G_UMIN %3, %8
+    $vgpr0 = COPY %5(s32)
+    %6:ccr_sgpr_64 = COPY %1
+    S_SETPC_B64_return %6, implicit $vgpr0
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
index b42ab8f729972..dca2c01783379 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
@@ -1,7 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
 
 define i32 @test_min_max_ValK0_K1_i32(i32 %a) {
+; GFX9-LABEL: test_min_max_ValK0_K1_i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_med3_i32 v0, v0, -12, 17
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: test_min_max_ValK0_K1_i32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -14,6 +21,12 @@ define i32 @test_min_max_ValK0_K1_i32(i32 %a) {
 }
 
 define i32 @min_max_ValK0_K1_i32(i32 %a) {
+; GFX9-LABEL: min_max_ValK0_K1_i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_med3_i32 v0, v0, -12, 17
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: min_max_ValK0_K1_i32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -26,6 +39,12 @@ define i32 @min_max_ValK0_K1_i32(i32 %a) {
 }
 
 define i32 @test_min_K1max_ValK0__i32(i32 %a) {
+; GFX9-LABEL: test_min_K1max_ValK0__i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_med3_i32 v0, v0, -12, 17
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: test_min_K1max_ValK0__i32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -38,6 +57,12 @@ define i32 @test_min_K1max_ValK0__i32(i32 %a) {
 }
 
 define i32 @test_min_K1max_K0Val__i32(i32 %a) {
+; GFX9-LABEL: test_min_K1max_K0Val__i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_med3_i32 v0, v0, -12, 17
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: test_min_K1max_K0Val__i32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -50,6 +75,12 @@ define i32 @test_min_K1max_K0Val__i32(i32 %a) {
 }
 
 define i32 @test_max_min_ValK1_K0_i32(i32 %a) {
+; GFX9-LABEL: test_max_min_ValK1_K0_i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_med3_i32 v0, v0, -12, 17
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: test_max_min_ValK1_K0_i32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -62,6 +93,12 @@ define i32 @test_max_min_ValK1_K0_i32(i32 %a) {
 }
 
 define i32 @test_max_min_K1Val_K0_i32(i32 %a) {
+; GFX9-LABEL: test_max_min_K1Val_K0_i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_med3_i32 v0, v0, -12, 17
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: test_max_min_K1Val_K0_i32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -74,6 +111,12 @@ define i32 @test_max_min_K1Val_K0_i32(i32 %a) {
 }
 
 define i32 @test_max_K0min_ValK1__i32(i32 %a) {
+; GFX9-LABEL: test_max_K0min_ValK1__i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_med3_i32 v0, v0, -12, 17
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: test_max_K0min_ValK1__i32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -86,6 +129,12 @@ define i32 @test_max_K0min_ValK1__i32(i32 %a) {
 }
 
 define i32 @test_max_K0min_K1Val__i32(i32 %a) {
+; GFX9-LABEL: test_max_K0min_K1Val__i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_med3_i32 v0, v0, -12, 17
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: test_max_K0min_K1Val__i32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -98,6 +147,13 @@ define i32 @test_max_K0min_K1Val__i32(i32 %a) {
 }
 
 define <2 x i16> @test_max_K0min_K1Val__v2i16(<2 x i16> %a) {
+; GFX9-LABEL: test_max_K0min_K1Val__v2i16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_pk_min_i16 v0, 17, v0 op_sel_hi:[0,1]
+; GFX9-NEXT:    v_pk_max_i16 v0, -12, v0 op_sel_hi:[0,1]
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: test_max_K0min_K1Val__v2i16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -111,6 +167,12 @@ define <2 x i16> @test_max_K0min_K1Val__v2i16(<2 x i16> %a) {
 }
 
 define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) {
+; GFX9-LABEL: test_uniform_min_max:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_max_i32 s0, s2, -12
+; GFX9-NEXT:    s_min_i32 s0, s0, 17
+; GFX9-NEXT:    ; return to shader part epilog
+;
 ; GFX10-LABEL: test_uniform_min_max:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_max_i32 s0, s2, -12
@@ -121,6 +183,25 @@ define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) {
   ret i32 %smed
 }
 
+define i32 @test_non_inline_constant_i32(i32 %a) {
+; GFX9-LABEL: test_non_inline_constant_i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0x41
+; GFX9-NEXT:    v_med3_i32 v0, v0, -12, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: test_non_inline_constant_i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_med3_i32 v0, v0, -12, 0x41
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %smax = call i32 @llvm.smax.i32(i32 %a, i32 -12)
+  %smed = call i32 @llvm.smin.i32(i32 %smax, i32 65)
+  ret i32 %smed
+}
+
 declare i32 @llvm.smin.i32(i32, i32)
 declare i32 @llvm.smax.i32(i32, i32)
 declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll
index 08c9ae6176207..de87e107f31ac 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll
@@ -1,7 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
 
 define i32 @test_min_max_ValK0_K1_u32(i32 %a) {
+; GFX9-LABEL: test_min_max_ValK0_K1_u32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_med3_u32 v0, v0, 12, 17
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: test_min_max_ValK0_K1_u32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -14,6 +21,12 @@ define i32 @test_min_max_ValK0_K1_u32(i32 %a) {
 }
 
 define i32 @min_max_ValK0_K1_i32(i32 %a) {
+; GFX9-LABEL: min_max_ValK0_K1_i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_med3_u32 v0, v0, 12, 17
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: min_max_ValK0_K1_i32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -26,6 +39,12 @@ define i32 @min_max_ValK0_K1_i32(i32 %a) {
 }
 
 define i32 @test_min_K1max_ValK0__u32(i32 %a) {
+; GFX9-LABEL: test_min_K1max_ValK0__u32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_med3_u32 v0, v0, 12, 17
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: test_min_K1max_ValK0__u32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -38,6 +57,12 @@ define i32 @test_min_K1max_ValK0__u32(i32 %a) {
 }
 
 define i32 @test_min_K1max_K0Val__u32(i32 %a) {
+; GFX9-LABEL: test_min_K1max_K0Val__u32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_med3_u32 v0, v0, 12, 17
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: test_min_K1max_K0Val__u32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -50,6 +75,12 @@ define i32 @test_min_K1max_K0Val__u32(i32 %a) {
 }
 
 define i32 @test_max_min_ValK1_K0_u32(i32 %a) {
+; GFX9-LABEL: test_max_min_ValK1_K0_u32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_med3_u32 v0, v0, 12, 17
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: test_max_min_ValK1_K0_u32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -62,6 +93,12 @@ define i32 @test_max_min_ValK1_K0_u32(i32 %a) {
 }
 
 define i32 @test_max_min_K1Val_K0_u32(i32 %a) {
+; GFX9-LABEL: test_max_min_K1Val_K0_u32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_med3_u32 v0, v0, 12, 17
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: test_max_min_K1Val_K0_u32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -74,6 +111,12 @@ define i32 @test_max_min_K1Val_K0_u32(i32 %a) {
 }
 
 define i32 @test_max_K0min_ValK1__u32(i32 %a) {
+; GFX9-LABEL: test_max_K0min_ValK1__u32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_med3_u32 v0, v0, 12, 17
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: test_max_K0min_ValK1__u32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -86,6 +129,12 @@ define i32 @test_max_K0min_ValK1__u32(i32 %a) {
 }
 
 define i32 @test_max_K0min_K1Val__u32(i32 %a) {
+; GFX9-LABEL: test_max_K0min_K1Val__u32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_med3_u32 v0, v0, 12, 17
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: test_max_K0min_K1Val__u32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -98,6 +147,13 @@ define i32 @test_max_K0min_K1Val__u32(i32 %a) {
 }
 
 define <2 x i16> @test_max_K0min_K1Val__v2u16(<2 x i16> %a) {
+; GFX9-LABEL: test_max_K0min_K1Val__v2u16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_pk_min_u16 v0, 17, v0 op_sel_hi:[0,1]
+; GFX9-NEXT:    v_pk_max_u16 v0, 12, v0 op_sel_hi:[0,1]
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX10-LABEL: test_max_K0min_K1Val__v2u16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -111,6 +167,12 @@ define <2 x i16> @test_max_K0min_K1Val__v2u16(<2 x i16> %a) {
 }
 
 define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) {
+; GFX9-LABEL: test_uniform_min_max:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_max_u32 s0, s2, 12
+; GFX9-NEXT:    s_min_u32 s0, s0, 17
+; GFX9-NEXT:    ; return to shader part epilog
+;
 ; GFX10-LABEL: test_uniform_min_max:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_max_u32 s0, s2, 12
@@ -121,6 +183,25 @@ define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) {
   ret i32 %umed
 }
 
+define i32 @test_non_inline_constant_u32(i32 %a) {
+; GFX9-LABEL: test_non_inline_constant_u32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0x41
+; GFX9-NEXT:    v_med3_u32 v0, v0, 12, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: test_non_inline_constant_u32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_med3_u32 v0, v0, 12, 0x41
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
+  %umed = call i32 @llvm.umin.i32(i32 %umax, i32 65)
+  ret i32 %umed
+}
+
 declare i32 @llvm.umin.i32(i32, i32)
 declare i32 @llvm.umax.i32(i32, i32)
 declare <2 x i16> @llvm.umin.v2i16(<2 x i16>, <2 x i16>)


        


More information about the llvm-commits mailing list