[llvm] 167c00e - [AMDGPU][GlobalISel] Add register bank legalization for G_SMIN/G_SMAX/G_UMIN/G_UMAX (#159821)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 10 05:34:19 PDT 2025
Author: Syadus Sefat
Date: 2025-10-10T14:34:15+02:00
New Revision: 167c00e4a4b44f2c0b7bd278526f1017fc94ae41
URL: https://github.com/llvm/llvm-project/commit/167c00e4a4b44f2c0b7bd278526f1017fc94ae41
DIFF: https://github.com/llvm/llvm-project/commit/167c00e4a4b44f2c0b7bd278526f1017fc94ae41.diff
LOG: [AMDGPU][GlobalISel] Add register bank legalization for G_SMIN/G_SMAX/G_UMIN/G_UMAX (#159821)
[AMDGPU][GlobalISel] Add register bank legalization for
G_SMIN/G_SMAX/G_UMIN/G_UMAX
This patch adds register bank legalization support for min/max
operations in the AMDGPU GlobalISel pipeline.
- Add support S16, S32, and V2S16 types
- For V2S16 uniform operations implements UnpackMinMax lowering
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll
llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll
llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll
llvm/test/CodeGen/AMDGPU/vector-reduce-umax.ll
llvm/test/CodeGen/AMDGPU/vector-reduce-umin.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 73b2660727342..540756653dd22 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -468,6 +468,38 @@ void RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) {
MI.eraseFromParent();
}
+void RegBankLegalizeHelper::lowerUnpackMinMax(MachineInstr &MI) {
+ Register Lo, Hi;
+ switch (MI.getOpcode()) {
+ case AMDGPU::G_SMIN:
+ case AMDGPU::G_SMAX: {
+ // For signed operations, use sign extension
+ auto [Val0_Lo, Val0_Hi] = unpackSExt(MI.getOperand(1).getReg());
+ auto [Val1_Lo, Val1_Hi] = unpackSExt(MI.getOperand(2).getReg());
+ Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Lo, Val1_Lo})
+ .getReg(0);
+ Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Hi, Val1_Hi})
+ .getReg(0);
+ break;
+ }
+ case AMDGPU::G_UMIN:
+ case AMDGPU::G_UMAX: {
+ // For unsigned operations, use zero extension
+ auto [Val0_Lo, Val0_Hi] = unpackZExt(MI.getOperand(1).getReg());
+ auto [Val1_Lo, Val1_Hi] = unpackZExt(MI.getOperand(2).getReg());
+ Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Lo, Val1_Lo})
+ .getReg(0);
+ Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Hi, Val1_Hi})
+ .getReg(0);
+ break;
+ }
+ default:
+ llvm_unreachable("Unpack min/max lowering not implemented");
+ }
+ B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {Lo, Hi});
+ MI.eraseFromParent();
+}
+
static bool isSignedBFE(MachineInstr &MI) {
if (GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI))
return (GI->is(Intrinsic::amdgcn_sbfe));
@@ -654,6 +686,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
}
case UnpackBitShift:
return lowerUnpackBitShift(MI);
+ case UnpackMinMax:
+ return lowerUnpackMinMax(MI);
case Ext32To64: {
const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
MachineInstrBuilder Hi;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
index 7affe5ab3da7f..d937815bf4714 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
@@ -123,6 +123,7 @@ class RegBankLegalizeHelper {
void lowerSplitTo32(MachineInstr &MI);
void lowerSplitTo32Select(MachineInstr &MI);
void lowerSplitTo32SExtInReg(MachineInstr &MI);
+ void lowerUnpackMinMax(MachineInstr &MI);
};
} // end namespace AMDGPU
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index f413bbcecb526..7392f4b10229a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -522,6 +522,22 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
+ addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
+ .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
+ .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
+ .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
+ .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
+ .Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackMinMax})
+ .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
+
+ addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
+ .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
+ .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
+ .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
+ .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
+ .Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackMinMax})
+ .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
+
// Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
// and G_FREEZE here, rest is trivially regbankselected earlier
addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index d0c69105356b8..93e0efda77fdd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -212,6 +212,7 @@ enum LoweringMethodID {
VccExtToSel,
UniExtToSel,
UnpackBitShift,
+ UnpackMinMax,
S_BFE,
V_BFE,
VgprToVccCopy,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
index eee553e4e872e..a7023d0ecb507 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' -o - %s | FileCheck %s
---
name: smax_s32_ss
@@ -188,8 +187,7 @@ body: |
; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16
- ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32)
+ ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C]](s32)
; CHECK-NEXT: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]]
; CHECK-NEXT: [[SMAX1:%[0-9]+]]:sgpr(s32) = G_SMAX [[ASHR]], [[ASHR1]]
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SMAX]](s32), [[SMAX1]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
index ef60aa81e4923..9dd5f45d26077 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' -o - %s | FileCheck %s
---
name: smin_s32_ss
@@ -191,8 +190,7 @@ body: |
; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16
- ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32)
+ ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C]](s32)
; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]]
; CHECK-NEXT: [[SMIN1:%[0-9]+]]:sgpr(s32) = G_SMIN [[ASHR]], [[ASHR1]]
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SMIN]](s32), [[SMIN1]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
index 36a38aac1ccaa..59d7dcea9f2d9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' -o - %s | FileCheck %s
---
name: umax_s32_ss
@@ -186,15 +185,13 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C]]
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
; CHECK-NEXT: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[AND]], [[AND1]]
; CHECK-NEXT: [[UMAX1:%[0-9]+]]:sgpr(s32) = G_UMAX [[LSHR]], [[LSHR1]]
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UMAX]](s32), [[UMAX1]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir
index bb232b5e07651..fdb05f60e48e0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' -o - %s | FileCheck %s
---
name: umin_s32_ss
@@ -190,15 +189,13 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C]]
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[AND]], [[AND1]]
; CHECK-NEXT: [[UMIN1:%[0-9]+]]:sgpr(s32) = G_UMIN [[LSHR]], [[LSHR1]]
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UMIN]](s32), [[UMIN1]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
index ac1e11b0f3cd8..dfa613c69733b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx802 < %s | FileCheck -check-prefixes=GFX89,GFX8 %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX89,GFX9 %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx802 < %s | FileCheck -check-prefixes=GFX89,GFX8 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX89,GFX9 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
define i32 @test_min_max_ValK0_K1_i32(i32 %a) {
; GFX89-LABEL: test_min_max_ValK0_K1_i32:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll
index 2b54123f01ad6..f5068f547a608 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx802 < %s | FileCheck -check-prefixes=GFX89,GFX8 %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX89,GFX9 %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx802 < %s | FileCheck -check-prefixes=GFX89,GFX8 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX89,GFX9 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
define i32 @test_min_max_ValK0_K1_u32(i32 %a) {
; GFX89-LABEL: test_min_max_ValK0_K1_u32:
diff --git a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
index 6b5bae06e5ef6..c9b94e096fde1 100644
--- a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
@@ -6,12 +6,12 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=SDAG-GFX12,SDAG-GFX12-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=SDAG-GFX12,SDAG-GFX12-FAKE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=fiji -global-isel < %s | FileCheck -check-prefixes=GISEL-VI %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel < %s | FileCheck -check-prefixes=GISEL-GFX9 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1101 -mattr=+real-true16 -global-isel < %s | FileCheck -check-prefixes=GFX11,GISEL-GFX11,GISEL-GFX11-TRUE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1101 -mattr=-real-true16 -global-isel < %s | FileCheck -check-prefixes=GFX11,GISEL-GFX11,GISEL-GFX11-FAKE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -global-isel < %s | FileCheck -check-prefixes=GISEL-GFX12,GISEL-GFX12-TRUE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -global-isel < %s | FileCheck -check-prefixes=GISEL-GFX12,GISEL-GFX12-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=fiji -global-isel -new-reg-bank-select < %s | FileCheck -check-prefixes=GISEL-VI %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel -new-reg-bank-select < %s | FileCheck -check-prefixes=GISEL-GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1101 -mattr=+real-true16 -global-isel -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX11,GISEL-GFX11,GISEL-GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1101 -mattr=-real-true16 -global-isel -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX11,GISEL-GFX11,GISEL-GFX11-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -global-isel -new-reg-bank-select < %s | FileCheck -check-prefixes=GISEL-GFX12,GISEL-GFX12-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -global-isel -new-reg-bank-select < %s | FileCheck -check-prefixes=GISEL-GFX12,GISEL-GFX12-FAKE16 %s
; <GFX9 has no V_SAT_PK, GFX9+ has V_SAT_PK, GFX11 has V_SAT_PK with t16
diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll
index b5d9d00c48045..8d0e00383d692 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll
@@ -1,21 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx801 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx801 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx801 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
; FIXME-TRUE16. enable gisel
-; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
+; XUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
-; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
+; XUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
define i8 @test_vector_reduce_smax_v2i8(<2 x i8> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v2i8:
@@ -1632,6 +1632,7 @@ entry:
ret i8 %res
}
+; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
define i16 @test_vector_reduce_smax_v2i16(<2 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v2i16:
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -1678,7 +1679,7 @@ define i16 @test_vector_reduce_smax_v2i16(<2 x i16> %v) {
; GFX9-GISEL-LABEL: test_vector_reduce_smax_v2i16:
; GFX9-GISEL: ; %bb.0: ; %entry
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1692,7 +1693,7 @@ define i16 @test_vector_reduce_smax_v2i16(<2 x i16> %v) {
; GFX10-GISEL-LABEL: test_vector_reduce_smax_v2i16:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1713,7 +1714,7 @@ define i16 @test_vector_reduce_smax_v2i16(<2 x i16> %v) {
; GFX11-GISEL-LABEL: test_vector_reduce_smax_v2i16:
; GFX11-GISEL: ; %bb.0: ; %entry
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1747,7 +1748,7 @@ define i16 @test_vector_reduce_smax_v2i16(<2 x i16> %v) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1900,6 +1901,7 @@ entry:
ret i16 %res
}
+; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
define i16 @test_vector_reduce_smax_v4i16(<4 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v4i16:
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -1961,7 +1963,7 @@ define i16 @test_vector_reduce_smax_v4i16(<4 x i16> %v) {
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX9-GISEL-NEXT: s_nop 0
-; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1977,7 +1979,7 @@ define i16 @test_vector_reduce_smax_v4i16(<4 x i16> %v) {
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2003,7 +2005,7 @@ define i16 @test_vector_reduce_smax_v4i16(<4 x i16> %v) {
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2041,7 +2043,7 @@ define i16 @test_vector_reduce_smax_v4i16(<4 x i16> %v) {
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
@@ -2049,6 +2051,7 @@ entry:
ret i16 %res
}
+; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
define i16 @test_vector_reduce_smax_v8i16(<8 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v8i16:
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -2139,7 +2142,7 @@ define i16 @test_vector_reduce_smax_v8i16(<8 x i16> %v) {
; GFX9-GISEL-NEXT: s_nop 0
; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX9-GISEL-NEXT: s_nop 0
-; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2159,7 +2162,7 @@ define i16 @test_vector_reduce_smax_v8i16(<8 x i16> %v) {
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v2
; GFX10-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2192,7 +2195,7 @@ define i16 @test_vector_reduce_smax_v8i16(<8 x i16> %v) {
; GFX11-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2238,7 +2241,7 @@ define i16 @test_vector_reduce_smax_v8i16(<8 x i16> %v) {
; GFX12-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
-; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2247,6 +2250,7 @@ entry:
ret i16 %res
}
+; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
define i16 @test_vector_reduce_smax_v16i16(<16 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v16i16:
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -2391,7 +2395,7 @@ define i16 @test_vector_reduce_smax_v16i16(<16 x i16> %v) {
; GFX9-GISEL-NEXT: s_nop 0
; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX9-GISEL-NEXT: s_nop 0
-; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX9-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2419,7 +2423,7 @@ define i16 @test_vector_reduce_smax_v16i16(<16 x i16> %v) {
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v2
; GFX10-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
; GFX10-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2467,7 +2471,7 @@ define i16 @test_vector_reduce_smax_v16i16(<16 x i16> %v) {
; GFX11-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2528,7 +2532,7 @@ define i16 @test_vector_reduce_smax_v16i16(<16 x i16> %v) {
; GFX12-GISEL-NEXT: v_pk_max_i16 v1, v1, v3
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
-; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_max_i16 v0, v0, v1
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll
index 2a989ecd2ebad..f15ecf014ab0b 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll
@@ -1,21 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx801 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx801 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx801 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
; FIXME-TRUE16. enable gisel
-; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
+; XUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
-; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
+; XUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
define i8 @test_vector_reduce_smin_v2i8(<2 x i8> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v2i8:
@@ -1632,6 +1632,7 @@ entry:
ret i8 %res
}
+; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
define i16 @test_vector_reduce_smin_v2i16(<2 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v2i16:
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -1678,7 +1679,7 @@ define i16 @test_vector_reduce_smin_v2i16(<2 x i16> %v) {
; GFX9-GISEL-LABEL: test_vector_reduce_smin_v2i16:
; GFX9-GISEL: ; %bb.0: ; %entry
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX9-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1692,7 +1693,7 @@ define i16 @test_vector_reduce_smin_v2i16(<2 x i16> %v) {
; GFX10-GISEL-LABEL: test_vector_reduce_smin_v2i16:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1713,7 +1714,7 @@ define i16 @test_vector_reduce_smin_v2i16(<2 x i16> %v) {
; GFX11-GISEL-LABEL: test_vector_reduce_smin_v2i16:
; GFX11-GISEL: ; %bb.0: ; %entry
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1747,7 +1748,7 @@ define i16 @test_vector_reduce_smin_v2i16(<2 x i16> %v) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1900,6 +1901,7 @@ entry:
ret i16 %res
}
+; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
define i16 @test_vector_reduce_smin_v4i16(<4 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v4i16:
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -1961,7 +1963,7 @@ define i16 @test_vector_reduce_smin_v4i16(<4 x i16> %v) {
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX9-GISEL-NEXT: s_nop 0
-; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX9-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1977,7 +1979,7 @@ define i16 @test_vector_reduce_smin_v4i16(<4 x i16> %v) {
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2003,7 +2005,7 @@ define i16 @test_vector_reduce_smin_v4i16(<4 x i16> %v) {
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX11-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2041,7 +2043,7 @@ define i16 @test_vector_reduce_smin_v4i16(<4 x i16> %v) {
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX12-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
@@ -2049,6 +2051,7 @@ entry:
ret i16 %res
}
+; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
define i16 @test_vector_reduce_smin_v8i16(<8 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v8i16:
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -2139,7 +2142,7 @@ define i16 @test_vector_reduce_smin_v8i16(<8 x i16> %v) {
; GFX9-GISEL-NEXT: s_nop 0
; GFX9-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX9-GISEL-NEXT: s_nop 0
-; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX9-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2159,7 +2162,7 @@ define i16 @test_vector_reduce_smin_v8i16(<8 x i16> %v) {
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v2
; GFX10-GISEL-NEXT: v_pk_min_i16 v1, v1, v3
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2192,7 +2195,7 @@ define i16 @test_vector_reduce_smin_v8i16(<8 x i16> %v) {
; GFX11-GISEL-NEXT: v_pk_min_i16 v1, v1, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2238,7 +2241,7 @@ define i16 @test_vector_reduce_smin_v8i16(<8 x i16> %v) {
; GFX12-GISEL-NEXT: v_pk_min_i16 v1, v1, v3
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
-; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2391,7 +2394,7 @@ define i16 @test_vector_reduce_smin_v16i16(<16 x i16> %v) {
; GFX9-GISEL-NEXT: s_nop 0
; GFX9-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX9-GISEL-NEXT: s_nop 0
-; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX9-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2419,7 +2422,7 @@ define i16 @test_vector_reduce_smin_v16i16(<16 x i16> %v) {
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v2
; GFX10-GISEL-NEXT: v_pk_min_i16 v1, v1, v3
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
; GFX10-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2467,7 +2470,7 @@ define i16 @test_vector_reduce_smin_v16i16(<16 x i16> %v) {
; GFX11-GISEL-NEXT: v_pk_min_i16 v1, v1, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2528,7 +2531,7 @@ define i16 @test_vector_reduce_smin_v16i16(<16 x i16> %v) {
; GFX12-GISEL-NEXT: v_pk_min_i16 v1, v1, v3
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
-; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_min_i16 v0, v0, v1
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-umax.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-umax.ll
index 69fd58aadfbcc..e62165cb933c5 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-reduce-umax.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-umax.ll
@@ -1,21 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx801 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx801 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx801 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
; FIXME-TRUE16. enable gisel
-; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
+; XUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
-; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
+; XUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
define i8 @test_vector_reduce_umax_v2i8(<2 x i8> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umax_v2i8:
@@ -1525,6 +1525,7 @@ entry:
ret i8 %res
}
+; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
define i16 @test_vector_reduce_umax_v2i16(<2 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umax_v2i16:
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -1569,7 +1570,7 @@ define i16 @test_vector_reduce_umax_v2i16(<2 x i16> %v) {
; GFX9-GISEL-LABEL: test_vector_reduce_umax_v2i16:
; GFX9-GISEL: ; %bb.0: ; %entry
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX9-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1583,7 +1584,7 @@ define i16 @test_vector_reduce_umax_v2i16(<2 x i16> %v) {
; GFX10-GISEL-LABEL: test_vector_reduce_umax_v2i16:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
; GFX10-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1604,7 +1605,7 @@ define i16 @test_vector_reduce_umax_v2i16(<2 x i16> %v) {
; GFX11-GISEL-LABEL: test_vector_reduce_umax_v2i16:
; GFX11-GISEL: ; %bb.0: ; %entry
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1638,7 +1639,7 @@ define i16 @test_vector_reduce_umax_v2i16(<2 x i16> %v) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1782,6 +1783,7 @@ entry:
ret i16 %res
}
+; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
define i16 @test_vector_reduce_umax_v4i16(<4 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umax_v4i16:
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -1841,7 +1843,7 @@ define i16 @test_vector_reduce_umax_v4i16(<4 x i16> %v) {
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX9-GISEL-NEXT: s_nop 0
-; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX9-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1857,7 +1859,7 @@ define i16 @test_vector_reduce_umax_v4i16(<4 x i16> %v) {
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
; GFX10-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1883,7 +1885,7 @@ define i16 @test_vector_reduce_umax_v4i16(<4 x i16> %v) {
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX11-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1921,7 +1923,7 @@ define i16 @test_vector_reduce_umax_v4i16(<4 x i16> %v) {
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX12-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
@@ -1929,6 +1931,7 @@ entry:
ret i16 %res
}
+; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
define i16 @test_vector_reduce_umax_v8i16(<8 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umax_v8i16:
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -2017,7 +2020,7 @@ define i16 @test_vector_reduce_umax_v8i16(<8 x i16> %v) {
; GFX9-GISEL-NEXT: s_nop 0
; GFX9-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX9-GISEL-NEXT: s_nop 0
-; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX9-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2037,7 +2040,7 @@ define i16 @test_vector_reduce_umax_v8i16(<8 x i16> %v) {
; GFX10-GISEL-NEXT: v_pk_max_u16 v0, v0, v2
; GFX10-GISEL-NEXT: v_pk_max_u16 v1, v1, v3
; GFX10-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
; GFX10-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2070,7 +2073,7 @@ define i16 @test_vector_reduce_umax_v8i16(<8 x i16> %v) {
; GFX11-GISEL-NEXT: v_pk_max_u16 v1, v1, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2116,7 +2119,7 @@ define i16 @test_vector_reduce_umax_v8i16(<8 x i16> %v) {
; GFX12-GISEL-NEXT: v_pk_max_u16 v1, v1, v3
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
-; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2125,6 +2128,7 @@ entry:
ret i16 %res
}
+; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
define i16 @test_vector_reduce_umax_v16i16(<16 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umax_v16i16:
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -2267,7 +2271,7 @@ define i16 @test_vector_reduce_umax_v16i16(<16 x i16> %v) {
; GFX9-GISEL-NEXT: s_nop 0
; GFX9-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX9-GISEL-NEXT: s_nop 0
-; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX9-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2295,7 +2299,7 @@ define i16 @test_vector_reduce_umax_v16i16(<16 x i16> %v) {
; GFX10-GISEL-NEXT: v_pk_max_u16 v0, v0, v2
; GFX10-GISEL-NEXT: v_pk_max_u16 v1, v1, v3
; GFX10-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
; GFX10-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2343,7 +2347,7 @@ define i16 @test_vector_reduce_umax_v16i16(<16 x i16> %v) {
; GFX11-GISEL-NEXT: v_pk_max_u16 v1, v1, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2404,7 +2408,7 @@ define i16 @test_vector_reduce_umax_v16i16(<16 x i16> %v) {
; GFX12-GISEL-NEXT: v_pk_max_u16 v1, v1, v3
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
-; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_max_u16 v0, v0, v1
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-umin.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-umin.ll
index 1d3b42ee43b0f..83ecaaa7e0846 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-reduce-umin.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-umin.ll
@@ -1,21 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx801 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx801 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx801 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
; FIXME-TRUE16. enable gisel
-; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
+; XUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
-; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
+; XUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
define i8 @test_vector_reduce_umin_v2i8(<2 x i8> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umin_v2i8:
@@ -1271,6 +1271,7 @@ entry:
ret i8 %res
}
+; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
define i16 @test_vector_reduce_umin_v2i16(<2 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umin_v2i16:
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -1312,7 +1313,7 @@ define i16 @test_vector_reduce_umin_v2i16(<2 x i16> %v) {
; GFX9-GISEL-LABEL: test_vector_reduce_umin_v2i16:
; GFX9-GISEL: ; %bb.0: ; %entry
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX9-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1326,7 +1327,7 @@ define i16 @test_vector_reduce_umin_v2i16(<2 x i16> %v) {
; GFX10-GISEL-LABEL: test_vector_reduce_umin_v2i16:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
; GFX10-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1347,7 +1348,7 @@ define i16 @test_vector_reduce_umin_v2i16(<2 x i16> %v) {
; GFX11-GISEL-LABEL: test_vector_reduce_umin_v2i16:
; GFX11-GISEL: ; %bb.0: ; %entry
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1381,7 +1382,7 @@ define i16 @test_vector_reduce_umin_v2i16(<2 x i16> %v) {
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1527,6 +1528,7 @@ entry:
ret i16 %res
}
+; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
define i16 @test_vector_reduce_umin_v4i16(<4 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umin_v4i16:
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -1583,7 +1585,7 @@ define i16 @test_vector_reduce_umin_v4i16(<4 x i16> %v) {
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX9-GISEL-NEXT: s_nop 0
-; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX9-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1599,7 +1601,7 @@ define i16 @test_vector_reduce_umin_v4i16(<4 x i16> %v) {
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
; GFX10-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1625,7 +1627,7 @@ define i16 @test_vector_reduce_umin_v4i16(<4 x i16> %v) {
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX11-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1663,7 +1665,7 @@ define i16 @test_vector_reduce_umin_v4i16(<4 x i16> %v) {
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX12-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
@@ -1671,6 +1673,7 @@ entry:
ret i16 %res
}
+; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
define i16 @test_vector_reduce_umin_v8i16(<8 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umin_v8i16:
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -1756,7 +1759,7 @@ define i16 @test_vector_reduce_umin_v8i16(<8 x i16> %v) {
; GFX9-GISEL-NEXT: s_nop 0
; GFX9-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX9-GISEL-NEXT: s_nop 0
-; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX9-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1776,7 +1779,7 @@ define i16 @test_vector_reduce_umin_v8i16(<8 x i16> %v) {
; GFX10-GISEL-NEXT: v_pk_min_u16 v0, v0, v2
; GFX10-GISEL-NEXT: v_pk_min_u16 v1, v1, v3
; GFX10-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
; GFX10-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -1809,7 +1812,7 @@ define i16 @test_vector_reduce_umin_v8i16(<8 x i16> %v) {
; GFX11-GISEL-NEXT: v_pk_min_u16 v1, v1, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1855,7 +1858,7 @@ define i16 @test_vector_reduce_umin_v8i16(<8 x i16> %v) {
; GFX12-GISEL-NEXT: v_pk_min_u16 v1, v1, v3
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
-; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1864,6 +1867,7 @@ entry:
ret i16 %res
}
+; FIXME: With -new-reg-bank-select, v_alignbit_b32 is regression. Need pattern to look through COPY.
define i16 @test_vector_reduce_umin_v16i16(<16 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umin_v16i16:
; GFX7-SDAG: ; %bb.0: ; %entry
@@ -2003,7 +2007,7 @@ define i16 @test_vector_reduce_umin_v16i16(<16 x i16> %v) {
; GFX9-GISEL-NEXT: s_nop 0
; GFX9-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX9-GISEL-NEXT: s_nop 0
-; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX9-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2031,7 +2035,7 @@ define i16 @test_vector_reduce_umin_v16i16(<16 x i16> %v) {
; GFX10-GISEL-NEXT: v_pk_min_u16 v0, v0, v2
; GFX10-GISEL-NEXT: v_pk_min_u16 v1, v1, v3
; GFX10-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX10-GISEL-NEXT: v_alignbit_b32 v1, s4, v0, 16
; GFX10-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2079,7 +2083,7 @@ define i16 @test_vector_reduce_umin_v16i16(<16 x i16> %v) {
; GFX11-GISEL-NEXT: v_pk_min_u16 v1, v1, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2140,7 +2144,7 @@ define i16 @test_vector_reduce_umin_v16i16(<16 x i16> %v) {
; GFX12-GISEL-NEXT: v_pk_min_u16 v1, v1, v3
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
-; GFX12-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-GISEL-NEXT: v_alignbit_b32 v1, s0, v0, 16
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_pk_min_u16 v0, v0, v1
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
More information about the llvm-commits
mailing list