[llvm] [GlobalISel] Fold G_ICMP if possible (PR #86357)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 28 07:18:26 PDT 2024
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/86357
>From f462e855a0164a9af5e4e99b8367d3ddd0e5ed91 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Thu, 28 Mar 2024 10:18:08 -0400
Subject: [PATCH] [GlobalISel] Fold G_ICMP if possible
This patch tries to fold `G_ICMP` if possible.
---
llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 4 ++
llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp | 14 ++++
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 23 +++++--
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 68 +++++++++++++++++++
.../legalize-atomic-cmpxchg-with-success.mir | 20 ++++--
.../AMDGPU/GlobalISel/legalize-saddo.mir | 27 +++++---
.../AMDGPU/GlobalISel/legalize-saddsat.mir | 54 ++++++++++-----
.../AMDGPU/GlobalISel/legalize-ssubo.mir | 27 +++++---
.../AMDGPU/GlobalISel/legalize-ssubsat.mir | 54 ++++++++++-----
9 files changed, 226 insertions(+), 65 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 0241ec4f2111d0..807cec3c177d9f 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -315,6 +315,10 @@ std::optional<SmallVector<unsigned>>
ConstantFoldCountZeros(Register Src, const MachineRegisterInfo &MRI,
std::function<unsigned(APInt)> CB);
+std::optional<SmallVector<APInt>>
+ConstantFoldICmp(unsigned Pred, const Register Op1, const Register Op2,
+ const MachineRegisterInfo &MRI);
+
/// Test if the given value is known to have exactly one bit set. This differs
/// from computeKnownBits in that it doesn't necessarily determine which bit is
/// set.
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index a0bc325c6cda7b..551ba1e6036c17 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -174,6 +174,20 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
switch (Opc) {
default:
break;
+ case TargetOpcode::G_ICMP: {
+ assert(SrcOps.size() == 3 && "Invalid sources");
+ assert(DstOps.size() == 1 && "Invalid dsts");
+ LLT SrcTy = SrcOps[1].getLLTTy(*getMRI());
+
+ if (std::optional<SmallVector<APInt>> Cst =
+ ConstantFoldICmp(SrcOps[0].getPredicate(), SrcOps[1].getReg(),
+ SrcOps[2].getReg(), *getMRI())) {
+ if (SrcTy.isVector())
+ return buildBuildVectorConstant(DstOps[0], *Cst);
+ return buildConstant(DstOps[0], Cst->front());
+ }
+ break;
+ }
case TargetOpcode::G_ADD:
case TargetOpcode::G_PTR_ADD:
case TargetOpcode::G_AND:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 4981d7b80b0b22..5d968a365dada2 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3768,9 +3768,13 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
}
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
- MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
+ Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
+ Register NewSuccessRes = MRI.cloneVirtualRegister(SuccessRes);
+ MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
**MI.memoperands_begin());
- MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
+ MIRBuilder.buildICmp(CmpInst::ICMP_EQ, NewSuccessRes, NewOldValRes, CmpVal);
+ MIRBuilder.buildCopy(OldValRes, NewOldValRes);
+ MIRBuilder.buildCopy(SuccessRes, NewSuccessRes);
MI.eraseFromParent();
return Legalized;
}
@@ -7657,10 +7661,13 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
LLT Ty = Dst0Ty;
LLT BoolTy = Dst1Ty;
+ Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
+ Register NewDst1 = MRI.cloneVirtualRegister(Dst1);
+
if (IsAdd)
- MIRBuilder.buildAdd(Dst0, LHS, RHS);
+ MIRBuilder.buildAdd(NewDst0, LHS, RHS);
else
- MIRBuilder.buildSub(Dst0, LHS, RHS);
+ MIRBuilder.buildSub(NewDst0, LHS, RHS);
// TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
@@ -7673,12 +7680,16 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
// (LHS) if and only if the other operand (RHS) is (non-zero) positive,
// otherwise there will be overflow.
auto ResultLowerThanLHS =
- MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
+ MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
auto ConditionRHS = MIRBuilder.buildICmp(
IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
- MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
+ MIRBuilder.buildXor(NewDst1, ConditionRHS, ResultLowerThanLHS);
+
+ MIRBuilder.buildCopy(Dst0, NewDst0);
+ MIRBuilder.buildCopy(Dst1, NewDst1);
MI.eraseFromParent();
+
return Legalized;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 8c41f8b1bdcdbc..c3bc3203b63605 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -997,6 +997,74 @@ llvm::ConstantFoldCountZeros(Register Src, const MachineRegisterInfo &MRI,
return std::nullopt;
}
+std::optional<SmallVector<APInt>>
+llvm::ConstantFoldICmp(unsigned Pred, const Register Op1, const Register Op2,
+ const MachineRegisterInfo &MRI) {
+ LLT Ty = MRI.getType(Op1);
+ if (Ty != MRI.getType(Op2))
+ return std::nullopt;
+
+ auto TryFoldScalar = [&MRI, Pred](Register LHS,
+ Register RHS) -> std::optional<APInt> {
+ auto LHSCst = getIConstantVRegVal(LHS, MRI);
+ auto RHSCst = getIConstantVRegVal(RHS, MRI);
+ if (!LHSCst || !RHSCst)
+ return std::nullopt;
+
+ switch (Pred) {
+ case CmpInst::Predicate::ICMP_EQ:
+ return APInt(/*numBits=*/1, LHSCst->eq(*RHSCst));
+ case CmpInst::Predicate::ICMP_NE:
+ return APInt(/*numBits=*/1, LHSCst->ne(*RHSCst));
+ case CmpInst::Predicate::ICMP_UGT:
+ return APInt(/*numBits=*/1, LHSCst->ugt(*RHSCst));
+ case CmpInst::Predicate::ICMP_UGE:
+ return APInt(/*numBits=*/1, LHSCst->uge(*RHSCst));
+ case CmpInst::Predicate::ICMP_ULT:
+ return APInt(/*numBits=*/1, LHSCst->ult(*RHSCst));
+ case CmpInst::Predicate::ICMP_ULE:
+ return APInt(/*numBits=*/1, LHSCst->ule(*RHSCst));
+ case CmpInst::Predicate::ICMP_SGT:
+ return APInt(/*numBits=*/1, LHSCst->sgt(*RHSCst));
+ case CmpInst::Predicate::ICMP_SGE:
+ return APInt(/*numBits=*/1, LHSCst->sge(*RHSCst));
+ case CmpInst::Predicate::ICMP_SLT:
+ return APInt(/*numBits=*/1, LHSCst->slt(*RHSCst));
+ case CmpInst::Predicate::ICMP_SLE:
+ return APInt(/*numBits=*/1, LHSCst->sle(*RHSCst));
+ default:
+ return std::nullopt;
+ }
+ };
+
+ SmallVector<APInt> FoldedICmps;
+
+ if (Ty.isVector()) {
+ // Try to constant fold each element.
+ auto *BV1 = getOpcodeDef<GBuildVector>(Op1, MRI);
+ auto *BV2 = getOpcodeDef<GBuildVector>(Op2, MRI);
+ if (!BV1 || !BV2)
+ return std::nullopt;
+ assert(BV1->getNumSources() == BV2->getNumSources() && "Invalid vectors");
+ for (unsigned I = 0; I < BV1->getNumSources(); ++I) {
+ if (auto MaybeFold =
+ TryFoldScalar(BV1->getSourceReg(I), BV2->getSourceReg(I))) {
+ FoldedICmps.emplace_back(*MaybeFold);
+ continue;
+ }
+ return std::nullopt;
+ }
+ return FoldedICmps;
+ }
+
+ if (auto MaybeCst = TryFoldScalar(Op1, Op2)) {
+ FoldedICmps.emplace_back(*MaybeCst);
+ return FoldedICmps;
+ }
+
+ return std::nullopt;
+}
+
bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
GISelKnownBits *KB) {
std::optional<DefinitionAndSourceRegister> DefSrcReg =
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir
index e288d9d5ab3c09..e6b2ebf4f6b403 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir
@@ -16,7 +16,9 @@ body: |
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 1)
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AMDGPU_ATOMIC_CMPXCHG]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s1) = COPY [[ICMP]](s1)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s32), implicit [[COPY4]](s1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s32) = COPY $vgpr3
@@ -40,7 +42,9 @@ body: |
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32))
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AMDGPU_ATOMIC_CMPXCHG]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s1) = COPY [[ICMP]](s1)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s32), implicit [[COPY4]](s1)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s32) = COPY $vgpr3
@@ -63,7 +67,9 @@ body: |
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 3)
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[COPY1]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ATOMIC_CMPXCHG]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s1) = COPY [[ICMP]](s1)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s32), implicit [[COPY4]](s1)
%0:_(p3) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
@@ -87,7 +93,9 @@ body: |
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY1]](s64)
; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 1)
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s64), [[COPY1]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[AMDGPU_ATOMIC_CMPXCHG]](s64)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s1) = COPY [[ICMP]](s1)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s64), implicit [[COPY4]](s1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(s64) = COPY $vgpr4_vgpr5
@@ -110,7 +118,9 @@ body: |
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr3_vgpr4
; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 3)
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s64), [[COPY1]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[ATOMIC_CMPXCHG]](s64)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s1) = COPY [[ICMP]](s1)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s64), implicit [[COPY4]](s1)
%0:_(p3) = COPY $vgpr0
%1:_(s64) = COPY $vgpr1_vgpr2
%2:_(s64) = COPY $vgpr3_vgpr4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir
index dba20e128237cd..9e14cb4c24e469 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir
@@ -21,9 +21,10 @@ body: |
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[COPY2]]
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]]
- ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY3]](s1)
; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
%0:_(s32) = COPY $vgpr0
@@ -56,7 +57,8 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[C]]
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
- ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY2]](s1)
; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
%0:_(s32) = COPY $vgpr0
@@ -86,8 +88,10 @@ body: |
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[COPY]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C]]
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
- ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
- ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ADD]](s32)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY3]](s1)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
@@ -117,8 +121,10 @@ body: |
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]]
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
- ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY3]](s1)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](s64)
; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
@@ -172,11 +178,12 @@ body: |
; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY [[BITCAST2]](<2 x s16>)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]]
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND2]](s32), [[AND3]](s32)
- ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](<2 x s16>)
; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
@@ -360,13 +367,14 @@ body: |
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1)
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1)
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR3]](s1)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s16>) = COPY [[CONCAT_VECTORS]](<4 x s16>)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]]
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]]
; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]]
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C3]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY5]](<4 x s16>)
; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<4 x s16>) = COPY $vgpr1_vgpr2
@@ -403,11 +411,12 @@ body: |
; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY [[BUILD_VECTOR]](<2 x s32>)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]]
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](<2 x s32>)
; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
index 93d00714158be5..10eb6d250bd33d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
@@ -955,15 +955,17 @@ body: |
; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]]
; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]]
; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
- ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32)
+ ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32)
; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
- ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]]
+ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[COPY3]](s1), [[MV1]], [[COPY2]]
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64)
;
; GFX8-LABEL: name: saddsat_s64
@@ -980,15 +982,17 @@ body: |
; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]]
; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]]
; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
- ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32)
+ ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32)
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
- ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]]
+ ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[COPY3]](s1), [[MV1]], [[COPY2]]
; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64)
;
; GFX9-LABEL: name: saddsat_s64
@@ -1005,15 +1009,17 @@ body: |
; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]]
; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]]
; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
- ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32)
+ ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32)
; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
- ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]]
+ ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[COPY3]](s1), [[MV1]], [[COPY2]]
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
@@ -1043,15 +1049,17 @@ body: |
; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]]
; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]]
; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
- ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32)
+ ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32)
; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]]
; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]]
; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
- ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]]
+ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[COPY3]](s1), [[MV1]], [[COPY2]]
; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]]
@@ -1060,13 +1068,15 @@ body: |
; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]]
; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]]
; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]]
- ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32)
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV2]](s64)
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(s1) = COPY [[XOR1]](s1)
+ ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY4]], [[C1]](s32)
; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]]
; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]]
; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32)
- ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]]
+ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[COPY5]](s1), [[MV3]], [[COPY4]]
; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
;
@@ -1086,15 +1096,17 @@ body: |
; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]]
; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]]
; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
- ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32)
+ ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32)
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]]
; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]]
; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
- ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]]
+ ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[COPY3]](s1), [[MV1]], [[COPY2]]
; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]]
@@ -1103,13 +1115,15 @@ body: |
; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]]
; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]]
; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]]
- ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32)
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV2]](s64)
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s1) = COPY [[XOR1]](s1)
+ ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY4]], [[C1]](s32)
; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]]
; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]]
; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32)
- ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]]
+ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[COPY5]](s1), [[MV3]], [[COPY4]]
; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
;
@@ -1129,15 +1143,17 @@ body: |
; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]]
; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]]
; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
- ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32)
+ ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32)
; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]]
; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]]
; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
- ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]]
+ ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[COPY3]](s1), [[MV1]], [[COPY2]]
; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]]
@@ -1146,13 +1162,15 @@ body: |
; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]]
; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]]
; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]]
- ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV2]](s64)
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s1) = COPY [[XOR1]](s1)
+ ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY4]], [[C1]](s32)
; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]]
; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]]
; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32)
- ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]]
+ ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[COPY5]](s1), [[MV3]], [[COPY4]]
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir
index 57b1ab9b194ec5..74a3525dfe514c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir
@@ -21,9 +21,10 @@ body: |
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[COPY2]]
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]]
- ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY3]](s1)
; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
%0:_(s32) = COPY $vgpr0
@@ -56,7 +57,8 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[C]]
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
- ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY2]](s1)
; CHECK-NEXT: $vgpr0 = COPY [[SUB]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
%0:_(s32) = COPY $vgpr0
@@ -86,8 +88,10 @@ body: |
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[COPY]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s32), [[C]]
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
- ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
- ; CHECK-NEXT: $vgpr0 = COPY [[SUB]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY3]](s1)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
@@ -117,8 +121,10 @@ body: |
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]]
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
- ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY3]](s1)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](s64)
; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
@@ -172,11 +178,12 @@ body: |
; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY [[BITCAST2]](<2 x s16>)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]]
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND2]](s32), [[AND3]](s32)
- ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](<2 x s16>)
; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
@@ -360,13 +367,14 @@ body: |
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1)
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1)
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR3]](s1)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s16>) = COPY [[CONCAT_VECTORS]](<4 x s16>)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]]
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]]
; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]]
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C3]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY5]](<4 x s16>)
; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<4 x s16>) = COPY $vgpr1_vgpr2
@@ -403,11 +411,12 @@ body: |
; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY [[BUILD_VECTOR]](<2 x s32>)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]]
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](<2 x s32>)
; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
index 33a8cda8e84b3e..f77c23e491ae8c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
@@ -955,15 +955,17 @@ body: |
; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]]
; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]]
; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
- ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32)
+ ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32)
; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]]
; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
- ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]]
+ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[COPY3]](s1), [[MV1]], [[COPY2]]
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64)
;
; GFX8-LABEL: name: ssubsat_s64
@@ -980,15 +982,17 @@ body: |
; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]]
; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]]
; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
- ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32)
+ ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32)
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]]
; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
- ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]]
+ ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[COPY3]](s1), [[MV1]], [[COPY2]]
; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64)
;
; GFX9-LABEL: name: ssubsat_s64
@@ -1005,15 +1009,17 @@ body: |
; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]]
; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]]
; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
- ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32)
+ ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32)
; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]]
; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
- ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]]
+ ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[COPY3]](s1), [[MV1]], [[COPY2]]
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
@@ -1043,15 +1049,17 @@ body: |
; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]]
; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]]
; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
- ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32)
+ ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32)
; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]]
; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]]
; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
- ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]]
+ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[COPY3]](s1), [[MV1]], [[COPY2]]
; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]]
@@ -1060,13 +1068,15 @@ body: |
; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]]
; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]]
; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]]
- ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32)
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV2]](s64)
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(s1) = COPY [[XOR1]](s1)
+ ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY4]], [[C1]](s32)
; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]]
; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]]
; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
- ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]]
+ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[COPY5]](s1), [[MV3]], [[COPY4]]
; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
;
@@ -1086,15 +1096,17 @@ body: |
; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]]
; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]]
; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
- ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32)
+ ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32)
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]]
; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]]
; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
- ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]]
+ ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[COPY3]](s1), [[MV1]], [[COPY2]]
; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]]
@@ -1103,13 +1115,15 @@ body: |
; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]]
; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]]
; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]]
- ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32)
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV2]](s64)
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s1) = COPY [[XOR1]](s1)
+ ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY4]], [[C1]](s32)
; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]]
; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]]
; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
- ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]]
+ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[COPY5]](s1), [[MV3]], [[COPY4]]
; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
;
@@ -1129,15 +1143,17 @@ body: |
; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]]
; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]]
; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
- ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32)
+ ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY2]], [[C1]](s32)
; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]]
; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]]
; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
- ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]]
+ ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[COPY3]](s1), [[MV1]], [[COPY2]]
; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]]
@@ -1146,13 +1162,15 @@ body: |
; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]]
; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]]
; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]]
- ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV2]](s64)
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s1) = COPY [[XOR1]](s1)
+ ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY4]], [[C1]](s32)
; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]]
; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]]
; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
- ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]]
+ ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[COPY5]](s1), [[MV3]], [[COPY4]]
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
More information about the llvm-commits
mailing list