[llvm] [GISel][RISCV][AArch64] Support legalizing G_SCMP/G_UCMP to sub(isgt,islt). (PR #119265)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 9 13:59:32 PST 2024
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/119265
>From f7b9e3ff32f41847df0c1197c11fdcaa1f74274c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 9 Dec 2024 12:00:30 -0800
Subject: [PATCH 1/2] [GISel][RISCV][AArch64] Support legalizing G_SCMP/G_UCMP
to sub(isgt, islt).
Convert the LLT to EVT and call TargetLowering::shouldExpandCmpUsingSelects
to determine if we should do this.
We don't have a getSetccResultType, so I'm boolean extending the compares
to the result type and using that. If the compares legalize to the same
type, these extends will get removed. Unfortunately, if the compares
legalize to a different type, we end up with truncates or extends that
might not be optimally placed. I wonder if we can work around this by
adding widening/narrowing rules for the G_SCMP/G_UCMP before lowering?
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 25 +-
.../GlobalISel/legalize-threeway-cmp.mir | 53 +--
llvm/test/CodeGen/AArch64/scmp.ll | 206 ++++--------
llvm/test/CodeGen/AArch64/ucmp.ll | 213 ++++--------
.../legalizer/legalize-threeway-cmp-rv32.mir | 16 +-
.../legalizer/legalize-threeway-cmp-rv64.mir | 26 +-
llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll | 237 +++----------
llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll | 310 ++++--------------
8 files changed, 284 insertions(+), 802 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index cf835ad187f818..1f414169db022c 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7920,6 +7920,7 @@ LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) {
Register Dst = Cmp->getReg(0);
LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Cmp->getReg(1));
LLT CmpTy = DstTy.changeElementSize(1);
CmpInst::Predicate LTPredicate = Cmp->isSigned()
@@ -7929,16 +7930,30 @@ LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) {
? CmpInst::Predicate::ICMP_SGT
: CmpInst::Predicate::ICMP_UGT;
- auto One = MIRBuilder.buildConstant(DstTy, 1);
auto Zero = MIRBuilder.buildConstant(DstTy, 0);
auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
Cmp->getRHSReg());
- auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
-
- auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
Cmp->getRHSReg());
- MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
+
+ auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ if (TLI.shouldExpandCmpUsingSelects(
+ getApproximateEVTForLLT(SrcTy, DL, Ctx))) {
+ auto One = MIRBuilder.buildConstant(DstTy, 1);
+ auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
+
+ auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
+ MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
+ } else {
+ if (TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false))
+ std::swap(IsGT, IsLT);
+ unsigned BoolExtOp =
+ MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
+ IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
+ IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
+ MIRBuilder.buildSub(Dst, IsGT, IsLT);
+ }
MI.eraseFromParent();
return Legalized;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir
index e69f79bdd187a3..27d17310c24e9f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir
@@ -8,10 +8,10 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]]
+ ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]]
- ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]]
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT1]], 2
@@ -31,10 +31,10 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]]
+ ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]]
- ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]]
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT1]], 2
@@ -61,42 +61,13 @@ body: |
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $w2
; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $w3
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(ugt), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC]], [[UV]]
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR3]](<8 x s8>)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<4 x s16>), [[UV3:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV2]], [[TRUNC1]]
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR4]](<8 x s8>)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s16>), [[UV5:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV4]], [[XOR]]
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]]
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(ult), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C3]](s8), [[C3]](s8), [[C3]](s8), [[C3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR5]](<8 x s8>)
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s16>), [[UV7:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT3]](<8 x s16>)
- ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC2]], [[UV6]]
- ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C3]](s8), [[C3]](s8), [[C3]](s8), [[C3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR6]](<8 x s8>)
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT4]](<8 x s16>)
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV8]], [[TRUNC3]]
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s16>) = G_AND [[OR]], [[XOR1]]
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND2]], [[AND3]]
- ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR1]](<4 x s16>)
- ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<4 x s32>) = G_SEXT_INREG [[ANYEXT5]], 2
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<4 x s16>) = G_SUB [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[SUB]](<4 x s16>)
+ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<4 x s32>) = G_SEXT_INREG [[ANYEXT]], 2
; CHECK-NEXT: $q0 = COPY [[SEXT_INREG]](<4 x s32>)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
@@ -125,15 +96,15 @@ body: |
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[DEF]](s64), [[DEF]]
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]]
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[SELECT]](s32), [[C]], [[C1]]
; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[DEF]](s64), [[DEF]]
; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[DEF]](s64), [[DEF]]
; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]]
- ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s32), [[ICMP5]], [[ICMP3]]
+ ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s32), [[ICMP5]], [[ICMP3]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[SELECT]](s32), [[C]], [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
- ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[SELECT2]](s32), [[C2]], [[SELECT1]]
+ ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[SELECT1]](s32), [[C2]], [[SELECT2]]
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT3]], 2
; CHECK-NEXT: $w0 = COPY [[SEXT_INREG]](s32)
%0:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/scmp.ll b/llvm/test/CodeGen/AArch64/scmp.ll
index 4aff5a836e1a18..9930dea4e37e2f 100644
--- a/llvm/test/CodeGen/AArch64/scmp.ll
+++ b/llvm/test/CodeGen/AArch64/scmp.ll
@@ -85,14 +85,13 @@ define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
; CHECK-GI-NEXT: cset w9, hi
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: csel w8, w9, w8, eq
-; CHECK-GI-NEXT: tst w8, #0x1
-; CHECK-GI-NEXT: cset w8, ne
-; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: cset w9, lt
; CHECK-GI-NEXT: cmp x0, x2
; CHECK-GI-NEXT: cset w10, lo
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: csel w9, w10, w9, eq
+; CHECK-GI-NEXT: tst w8, #0x1
+; CHECK-GI-NEXT: cset w8, ne
; CHECK-GI-NEXT: tst w9, #0x1
; CHECK-GI-NEXT: csinv w0, w8, wzr, eq
; CHECK-GI-NEXT: ret
@@ -134,88 +133,48 @@ define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
}
define <8 x i8> @s_v8i8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-SD-LABEL: s_v8i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT: cmgt v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT: sub v0.8b, v0.8b, v2.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v8i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.8b, #1
-; CHECK-GI-NEXT: cmgt v3.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b
-; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v8i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.8b, v0.8b, v1.8b
+; CHECK-NEXT: cmgt v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: sub v0.8b, v0.8b, v2.8b
+; CHECK-NEXT: ret
entry:
%c = call <8 x i8> @llvm.scmp(<8 x i8> %a, <8 x i8> %b)
ret <8 x i8> %c
}
define <16 x i8> @s_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SD-LABEL: s_v16i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT: cmgt v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT: sub v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v16i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.16b, #1
-; CHECK-GI-NEXT: cmgt v3.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.16b, v0.16b, v1.16b
+; CHECK-NEXT: cmgt v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: sub v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: ret
entry:
%c = call <16 x i8> @llvm.scmp(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %c
}
define <4 x i16> @s_v4i16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-SD-LABEL: s_v4i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.4h, v0.4h, v1.4h
-; CHECK-SD-NEXT: cmgt v0.4h, v1.4h, v0.4h
-; CHECK-SD-NEXT: sub v0.4h, v0.4h, v2.4h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v4i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.4h, #1
-; CHECK-GI-NEXT: cmgt v3.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.4h, v1.4h, v0.4h
-; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b
-; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.4h, v0.4h, v1.4h
+; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h
+; CHECK-NEXT: sub v0.4h, v0.4h, v2.4h
+; CHECK-NEXT: ret
entry:
%c = call <4 x i16> @llvm.scmp(<4 x i16> %a, <4 x i16> %b)
ret <4 x i16> %c
}
define <8 x i16> @s_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SD-LABEL: s_v8i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.8h, v0.8h, v1.8h
-; CHECK-SD-NEXT: cmgt v0.8h, v1.8h, v0.8h
-; CHECK-SD-NEXT: sub v0.8h, v0.8h, v2.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v8i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.8h, #1
-; CHECK-GI-NEXT: cmgt v3.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.8h, v1.8h, v0.8h
-; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.8h, v0.8h, v1.8h
+; CHECK-NEXT: cmgt v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: sub v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%c = call <8 x i16> @llvm.scmp(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %c
@@ -234,16 +193,12 @@ define <16 x i16> @s_v16i16(<16 x i16> %a, <16 x i16> %b) {
;
; CHECK-GI-LABEL: s_v16i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v4.8h, #1
-; CHECK-GI-NEXT: cmgt v5.8h, v0.8h, v2.8h
-; CHECK-GI-NEXT: cmgt v6.8h, v1.8h, v3.8h
-; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: cmgt v4.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: cmgt v5.8h, v1.8h, v3.8h
; CHECK-GI-NEXT: cmgt v0.8h, v2.8h, v0.8h
; CHECK-GI-NEXT: cmgt v1.8h, v3.8h, v1.8h
-; CHECK-GI-NEXT: and v5.16b, v4.16b, v5.16b
-; CHECK-GI-NEXT: and v4.16b, v4.16b, v6.16b
-; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v5.16b
-; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v4.16b
+; CHECK-GI-NEXT: sub v0.8h, v0.8h, v4.8h
+; CHECK-GI-NEXT: sub v1.8h, v1.8h, v5.8h
; CHECK-GI-NEXT: ret
entry:
%c = call <16 x i16> @llvm.scmp(<16 x i16> %a, <16 x i16> %b)
@@ -251,44 +206,24 @@ entry:
}
define <2 x i32> @s_v2i32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-SD-LABEL: s_v2i32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.2s, v0.2s, v1.2s
-; CHECK-SD-NEXT: cmgt v0.2s, v1.2s, v0.2s
-; CHECK-SD-NEXT: sub v0.2s, v0.2s, v2.2s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v2i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.2s, #1
-; CHECK-GI-NEXT: cmgt v3.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b
-; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.2s, v0.2s, v1.2s
+; CHECK-NEXT: cmgt v0.2s, v1.2s, v0.2s
+; CHECK-NEXT: sub v0.2s, v0.2s, v2.2s
+; CHECK-NEXT: ret
entry:
%c = call <2 x i32> @llvm.scmp(<2 x i32> %a, <2 x i32> %b)
ret <2 x i32> %c
}
define <4 x i32> @s_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SD-LABEL: s_v4i32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: cmgt v0.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT: sub v0.4s, v0.4s, v2.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v4i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.4s, #1
-; CHECK-GI-NEXT: cmgt v3.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%c = call <4 x i32> @llvm.scmp(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %c
@@ -307,16 +242,12 @@ define <8 x i32> @s_v8i32(<8 x i32> %a, <8 x i32> %b) {
;
; CHECK-GI-LABEL: s_v8i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v4.4s, #1
-; CHECK-GI-NEXT: cmgt v5.4s, v0.4s, v2.4s
-; CHECK-GI-NEXT: cmgt v6.4s, v1.4s, v3.4s
-; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: cmgt v4.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: cmgt v5.4s, v1.4s, v3.4s
; CHECK-GI-NEXT: cmgt v0.4s, v2.4s, v0.4s
; CHECK-GI-NEXT: cmgt v1.4s, v3.4s, v1.4s
-; CHECK-GI-NEXT: and v5.16b, v4.16b, v5.16b
-; CHECK-GI-NEXT: and v4.16b, v4.16b, v6.16b
-; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v5.16b
-; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v4.16b
+; CHECK-GI-NEXT: sub v0.4s, v0.4s, v4.4s
+; CHECK-GI-NEXT: sub v1.4s, v1.4s, v5.4s
; CHECK-GI-NEXT: ret
entry:
%c = call <8 x i32> @llvm.scmp(<8 x i32> %a, <8 x i32> %b)
@@ -324,23 +255,12 @@ entry:
}
define <2 x i64> @s_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SD-LABEL: s_v2i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT: cmgt v0.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT: sub v0.2d, v0.2d, v2.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v2i64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI16_0
-; CHECK-GI-NEXT: cmgt v2.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI16_0]
-; CHECK-GI-NEXT: cmgt v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: and v2.16b, v3.16b, v2.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d
+; CHECK-NEXT: cmgt v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: sub v0.2d, v0.2d, v2.2d
+; CHECK-NEXT: ret
entry:
%c = call <2 x i64> @llvm.scmp(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %c
@@ -359,17 +279,12 @@ define <4 x i64> @s_v4i64(<4 x i64> %a, <4 x i64> %b) {
;
; CHECK-GI-LABEL: s_v4i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI17_0
; CHECK-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d
-; CHECK-GI-NEXT: cmgt v6.2d, v1.2d, v3.2d
-; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI17_0]
-; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: cmgt v5.2d, v1.2d, v3.2d
; CHECK-GI-NEXT: cmgt v0.2d, v2.2d, v0.2d
; CHECK-GI-NEXT: cmgt v1.2d, v3.2d, v1.2d
-; CHECK-GI-NEXT: and v4.16b, v5.16b, v4.16b
-; CHECK-GI-NEXT: and v5.16b, v5.16b, v6.16b
-; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v4.16b
-; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v5.16b
+; CHECK-GI-NEXT: sub v0.2d, v0.2d, v4.2d
+; CHECK-GI-NEXT: sub v1.2d, v1.2d, v5.2d
; CHECK-GI-NEXT: ret
entry:
%c = call <4 x i64> @llvm.scmp(<4 x i64> %a, <4 x i64> %b)
@@ -394,16 +309,13 @@ define <16 x i8> @signOf_neon_scmp(<8 x i16> %s0_lo, <8 x i16> %s0_hi, <8 x i16>
; CHECK-GI-NEXT: cmgt v5.8h, v1.8h, v3.8h
; CHECK-GI-NEXT: cmgt v0.8h, v2.8h, v0.8h
; CHECK-GI-NEXT: cmgt v1.8h, v3.8h, v1.8h
-; CHECK-GI-NEXT: movi v2.16b, #1
-; CHECK-GI-NEXT: movi v3.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: uzp1 v4.16b, v4.16b, v5.16b
; CHECK-GI-NEXT: uzp1 v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: shl v1.16b, v4.16b, #7
+; CHECK-GI-NEXT: uzp1 v1.16b, v4.16b, v5.16b
; CHECK-GI-NEXT: shl v0.16b, v0.16b, #7
-; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7
+; CHECK-GI-NEXT: shl v1.16b, v1.16b, #7
; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7
-; CHECK-GI-NEXT: and v1.16b, v2.16b, v1.16b
-; CHECK-GI-NEXT: bsl v0.16b, v3.16b, v1.16b
+; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7
+; CHECK-GI-NEXT: sub v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
%0 = shufflevector <8 x i16> %s0_lo, <8 x i16> %s0_hi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
diff --git a/llvm/test/CodeGen/AArch64/ucmp.ll b/llvm/test/CodeGen/AArch64/ucmp.ll
index 125ac7f61a41e5..b67dd958b29edd 100644
--- a/llvm/test/CodeGen/AArch64/ucmp.ll
+++ b/llvm/test/CodeGen/AArch64/ucmp.ll
@@ -85,14 +85,13 @@ define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind {
; CHECK-GI-NEXT: cset w9, hi
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: csel w8, w9, w8, eq
-; CHECK-GI-NEXT: tst w8, #0x1
-; CHECK-GI-NEXT: cset w8, ne
-; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: cset w9, lo
; CHECK-GI-NEXT: cmp x0, x2
; CHECK-GI-NEXT: cset w10, lo
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: csel w9, w10, w9, eq
+; CHECK-GI-NEXT: tst w8, #0x1
+; CHECK-GI-NEXT: cset w8, ne
; CHECK-GI-NEXT: tst w9, #0x1
; CHECK-GI-NEXT: csinv w0, w8, wzr, eq
; CHECK-GI-NEXT: ret
@@ -157,16 +156,15 @@ define <1 x i64> @ucmp.1.64.65(<1 x i65> %x, <1 x i65> %y) {
; CHECK-GI-NEXT: cset w11, hi
; CHECK-GI-NEXT: cmp x8, x9
; CHECK-GI-NEXT: csel w10, w11, w10, eq
-; CHECK-GI-NEXT: tst w10, #0x1
-; CHECK-GI-NEXT: cset x10, ne
-; CHECK-GI-NEXT: cmp x8, x9
; CHECK-GI-NEXT: cset w11, lo
; CHECK-GI-NEXT: cmp x0, x2
; CHECK-GI-NEXT: cset w12, lo
; CHECK-GI-NEXT: cmp x8, x9
; CHECK-GI-NEXT: csel w8, w12, w11, eq
+; CHECK-GI-NEXT: tst w10, #0x1
+; CHECK-GI-NEXT: cset x9, ne
; CHECK-GI-NEXT: tst w8, #0x1
-; CHECK-GI-NEXT: csinv x8, x10, xzr, eq
+; CHECK-GI-NEXT: csinv x8, x9, xzr, eq
; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: ret
%1 = call <1 x i64> @llvm.ucmp(<1 x i65> %x, <1 x i65> %y)
@@ -174,88 +172,48 @@ define <1 x i64> @ucmp.1.64.65(<1 x i65> %x, <1 x i65> %y) {
}
define <8 x i8> @u_v8i8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-SD-LABEL: u_v8i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT: cmhi v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT: sub v0.8b, v0.8b, v2.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: u_v8i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.8b, #1
-; CHECK-GI-NEXT: cmhi v3.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmhi v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b
-; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: u_v8i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.8b, v0.8b, v1.8b
+; CHECK-NEXT: cmhi v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: sub v0.8b, v0.8b, v2.8b
+; CHECK-NEXT: ret
entry:
%c = call <8 x i8> @llvm.ucmp(<8 x i8> %a, <8 x i8> %b)
ret <8 x i8> %c
}
define <16 x i8> @u_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SD-LABEL: u_v16i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT: cmhi v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT: sub v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: u_v16i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.16b, #1
-; CHECK-GI-NEXT: cmhi v3.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmhi v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: u_v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.16b, v0.16b, v1.16b
+; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: sub v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: ret
entry:
%c = call <16 x i8> @llvm.ucmp(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %c
}
define <4 x i16> @u_v4i16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-SD-LABEL: u_v4i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.4h, v0.4h, v1.4h
-; CHECK-SD-NEXT: cmhi v0.4h, v1.4h, v0.4h
-; CHECK-SD-NEXT: sub v0.4h, v0.4h, v2.4h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: u_v4i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.4h, #1
-; CHECK-GI-NEXT: cmhi v3.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmhi v0.4h, v1.4h, v0.4h
-; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b
-; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: u_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.4h, v0.4h, v1.4h
+; CHECK-NEXT: cmhi v0.4h, v1.4h, v0.4h
+; CHECK-NEXT: sub v0.4h, v0.4h, v2.4h
+; CHECK-NEXT: ret
entry:
%c = call <4 x i16> @llvm.ucmp(<4 x i16> %a, <4 x i16> %b)
ret <4 x i16> %c
}
define <8 x i16> @u_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SD-LABEL: u_v8i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.8h, v0.8h, v1.8h
-; CHECK-SD-NEXT: cmhi v0.8h, v1.8h, v0.8h
-; CHECK-SD-NEXT: sub v0.8h, v0.8h, v2.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: u_v8i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.8h, #1
-; CHECK-GI-NEXT: cmhi v3.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmhi v0.8h, v1.8h, v0.8h
-; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: u_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.8h, v0.8h, v1.8h
+; CHECK-NEXT: cmhi v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: sub v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%c = call <8 x i16> @llvm.ucmp(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %c
@@ -274,16 +232,12 @@ define <16 x i16> @u_v16i16(<16 x i16> %a, <16 x i16> %b) {
;
; CHECK-GI-LABEL: u_v16i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v4.8h, #1
-; CHECK-GI-NEXT: cmhi v5.8h, v0.8h, v2.8h
-; CHECK-GI-NEXT: cmhi v6.8h, v1.8h, v3.8h
-; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: cmhi v4.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: cmhi v5.8h, v1.8h, v3.8h
; CHECK-GI-NEXT: cmhi v0.8h, v2.8h, v0.8h
; CHECK-GI-NEXT: cmhi v1.8h, v3.8h, v1.8h
-; CHECK-GI-NEXT: and v5.16b, v4.16b, v5.16b
-; CHECK-GI-NEXT: and v4.16b, v4.16b, v6.16b
-; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v5.16b
-; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v4.16b
+; CHECK-GI-NEXT: sub v0.8h, v0.8h, v4.8h
+; CHECK-GI-NEXT: sub v1.8h, v1.8h, v5.8h
; CHECK-GI-NEXT: ret
entry:
%c = call <16 x i16> @llvm.ucmp(<16 x i16> %a, <16 x i16> %b)
@@ -291,44 +245,24 @@ entry:
}
define <2 x i32> @u_v2i32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-SD-LABEL: u_v2i32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.2s, v0.2s, v1.2s
-; CHECK-SD-NEXT: cmhi v0.2s, v1.2s, v0.2s
-; CHECK-SD-NEXT: sub v0.2s, v0.2s, v2.2s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: u_v2i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.2s, #1
-; CHECK-GI-NEXT: cmhi v3.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmhi v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b
-; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: u_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.2s, v0.2s, v1.2s
+; CHECK-NEXT: cmhi v0.2s, v1.2s, v0.2s
+; CHECK-NEXT: sub v0.2s, v0.2s, v2.2s
+; CHECK-NEXT: ret
entry:
%c = call <2 x i32> @llvm.ucmp(<2 x i32> %a, <2 x i32> %b)
ret <2 x i32> %c
}
define <4 x i32> @u_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SD-LABEL: u_v4i32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: cmhi v0.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT: sub v0.4s, v0.4s, v2.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: u_v4i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.4s, #1
-; CHECK-GI-NEXT: cmhi v3.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmhi v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: u_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%c = call <4 x i32> @llvm.ucmp(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %c
@@ -347,16 +281,12 @@ define <8 x i32> @u_v8i32(<8 x i32> %a, <8 x i32> %b) {
;
; CHECK-GI-LABEL: u_v8i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v4.4s, #1
-; CHECK-GI-NEXT: cmhi v5.4s, v0.4s, v2.4s
-; CHECK-GI-NEXT: cmhi v6.4s, v1.4s, v3.4s
-; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: cmhi v4.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: cmhi v5.4s, v1.4s, v3.4s
; CHECK-GI-NEXT: cmhi v0.4s, v2.4s, v0.4s
; CHECK-GI-NEXT: cmhi v1.4s, v3.4s, v1.4s
-; CHECK-GI-NEXT: and v5.16b, v4.16b, v5.16b
-; CHECK-GI-NEXT: and v4.16b, v4.16b, v6.16b
-; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v5.16b
-; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v4.16b
+; CHECK-GI-NEXT: sub v0.4s, v0.4s, v4.4s
+; CHECK-GI-NEXT: sub v1.4s, v1.4s, v5.4s
; CHECK-GI-NEXT: ret
entry:
%c = call <8 x i32> @llvm.ucmp(<8 x i32> %a, <8 x i32> %b)
@@ -364,23 +294,12 @@ entry:
}
define <2 x i64> @u_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SD-LABEL: u_v2i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT: cmhi v0.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT: sub v0.2d, v0.2d, v2.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: u_v2i64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI17_0
-; CHECK-GI-NEXT: cmhi v2.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI17_0]
-; CHECK-GI-NEXT: cmhi v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: and v2.16b, v3.16b, v2.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: u_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.2d, v0.2d, v1.2d
+; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: sub v0.2d, v0.2d, v2.2d
+; CHECK-NEXT: ret
entry:
%c = call <2 x i64> @llvm.ucmp(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %c
@@ -399,17 +318,12 @@ define <4 x i64> @u_v4i64(<4 x i64> %a, <4 x i64> %b) {
;
; CHECK-GI-LABEL: u_v4i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI18_0
; CHECK-GI-NEXT: cmhi v4.2d, v0.2d, v2.2d
-; CHECK-GI-NEXT: cmhi v6.2d, v1.2d, v3.2d
-; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI18_0]
-; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: cmhi v5.2d, v1.2d, v3.2d
; CHECK-GI-NEXT: cmhi v0.2d, v2.2d, v0.2d
; CHECK-GI-NEXT: cmhi v1.2d, v3.2d, v1.2d
-; CHECK-GI-NEXT: and v4.16b, v5.16b, v4.16b
-; CHECK-GI-NEXT: and v5.16b, v5.16b, v6.16b
-; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v4.16b
-; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v5.16b
+; CHECK-GI-NEXT: sub v0.2d, v0.2d, v4.2d
+; CHECK-GI-NEXT: sub v1.2d, v1.2d, v5.2d
; CHECK-GI-NEXT: ret
entry:
%c = call <4 x i64> @llvm.ucmp(<4 x i64> %a, <4 x i64> %b)
@@ -434,16 +348,13 @@ define <16 x i8> @signOf_neon(<8 x i16> %s0_lo, <8 x i16> %s0_hi, <8 x i16> %s1_
; CHECK-GI-NEXT: cmhi v5.8h, v1.8h, v3.8h
; CHECK-GI-NEXT: cmhi v0.8h, v2.8h, v0.8h
; CHECK-GI-NEXT: cmhi v1.8h, v3.8h, v1.8h
-; CHECK-GI-NEXT: movi v2.16b, #1
-; CHECK-GI-NEXT: movi v3.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: uzp1 v4.16b, v4.16b, v5.16b
; CHECK-GI-NEXT: uzp1 v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: shl v1.16b, v4.16b, #7
+; CHECK-GI-NEXT: uzp1 v1.16b, v4.16b, v5.16b
; CHECK-GI-NEXT: shl v0.16b, v0.16b, #7
-; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7
+; CHECK-GI-NEXT: shl v1.16b, v1.16b, #7
; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7
-; CHECK-GI-NEXT: and v1.16b, v2.16b, v1.16b
-; CHECK-GI-NEXT: bsl v0.16b, v3.16b, v1.16b
+; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7
+; CHECK-GI-NEXT: sub v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
%0 = shufflevector <8 x i16> %s0_lo, <8 x i16> %s0_hi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir
index 5c3d7e5975f1fd..91743a4a8d0f86 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir
@@ -12,14 +12,10 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]]
- ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]]
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]]
- ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[ICMP1]], [[ICMP]]
+ ; CHECK-NEXT: $x10 = COPY [[SUB]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s32) = COPY $x10
%1:_(s32) = COPY $x11
@@ -40,14 +36,10 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]]
- ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]]
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]]
- ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[ICMP1]], [[ICMP]]
+ ; CHECK-NEXT: $x10 = COPY [[SUB]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s32) = COPY $x10
%1:_(s32) = COPY $x11
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir
index ccade88ffae7c8..e26e89d2ffdab4 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir
@@ -12,20 +12,13 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C1]](s64)
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32
; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 32
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sgt), [[SEXT_INREG]](s64), [[SEXT_INREG1]]
- ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s64), [[TRUNC]], [[TRUNC1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64)
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(slt), [[SEXT_INREG]](s64), [[SEXT_INREG1]]
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s64), [[TRUNC2]], [[SELECT]]
- ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SELECT1]](s32)
- ; CHECK-NEXT: $x10 = COPY [[SEXT]](s64)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[ICMP1]], [[ICMP]]
+ ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 32
+ ; CHECK-NEXT: $x10 = COPY [[SEXT_INREG2]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
%2:_(s64) = COPY $x10
%0:_(s32) = G_TRUNC %2(s64)
@@ -48,20 +41,13 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C1]](s64)
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32
; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 32
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sgt), [[SEXT_INREG]](s64), [[SEXT_INREG1]]
- ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s64), [[TRUNC]], [[TRUNC1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64)
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(slt), [[SEXT_INREG]](s64), [[SEXT_INREG1]]
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s64), [[TRUNC2]], [[SELECT]]
- ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SELECT1]](s32)
- ; CHECK-NEXT: $x10 = COPY [[SEXT]](s64)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[ICMP1]], [[ICMP]]
+ ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 32
+ ; CHECK-NEXT: $x10 = COPY [[SEXT_INREG2]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
%2:_(s64) = COPY $x10
%0:_(s32) = G_TRUNC %2(s64)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll
index 0f2b6281b6f88c..c26e30721c8482 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll
@@ -5,34 +5,16 @@
define i8 @scmp.8.8(i8 signext %x, i8 signext %y) nounwind {
; RV32I-LABEL: scmp.8.8:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: blt a1, a0, .LBB0_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: blt a2, a1, .LBB0_3
-; RV32I-NEXT: j .LBB0_4
-; RV32I-NEXT: .LBB0_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bge a2, a1, .LBB0_4
-; RV32I-NEXT: .LBB0_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB0_4:
+; RV32I-NEXT: slt a2, a1, a0
+; RV32I-NEXT: slt a0, a0, a1
+; RV32I-NEXT: sub a0, a0, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.8.8:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: blt a1, a0, .LBB0_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: blt a2, a1, .LBB0_3
-; RV64I-NEXT: j .LBB0_4
-; RV64I-NEXT: .LBB0_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bge a2, a1, .LBB0_4
-; RV64I-NEXT: .LBB0_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB0_4:
+; RV64I-NEXT: slt a2, a1, a0
+; RV64I-NEXT: slt a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
%1 = call i8 @llvm.scmp(i8 %x, i8 %y)
ret i8 %1
@@ -41,34 +23,16 @@ define i8 @scmp.8.8(i8 signext %x, i8 signext %y) nounwind {
define i8 @scmp.8.16(i16 signext %x, i16 signext %y) nounwind {
; RV32I-LABEL: scmp.8.16:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: blt a1, a0, .LBB1_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: blt a2, a1, .LBB1_3
-; RV32I-NEXT: j .LBB1_4
-; RV32I-NEXT: .LBB1_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bge a2, a1, .LBB1_4
-; RV32I-NEXT: .LBB1_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB1_4:
+; RV32I-NEXT: slt a2, a1, a0
+; RV32I-NEXT: slt a0, a0, a1
+; RV32I-NEXT: sub a0, a0, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.8.16:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: blt a1, a0, .LBB1_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: blt a2, a1, .LBB1_3
-; RV64I-NEXT: j .LBB1_4
-; RV64I-NEXT: .LBB1_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bge a2, a1, .LBB1_4
-; RV64I-NEXT: .LBB1_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB1_4:
+; RV64I-NEXT: slt a2, a1, a0
+; RV64I-NEXT: slt a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
%1 = call i8 @llvm.scmp(i16 %x, i16 %y)
ret i8 %1
@@ -77,35 +41,18 @@ define i8 @scmp.8.16(i16 signext %x, i16 signext %y) nounwind {
define i8 @scmp.8.32(i32 %x, i32 %y) nounwind {
; RV32I-LABEL: scmp.8.32:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: blt a1, a0, .LBB2_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: blt a2, a1, .LBB2_3
-; RV32I-NEXT: j .LBB2_4
-; RV32I-NEXT: .LBB2_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bge a2, a1, .LBB2_4
-; RV32I-NEXT: .LBB2_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB2_4:
+; RV32I-NEXT: slt a2, a1, a0
+; RV32I-NEXT: slt a0, a0, a1
+; RV32I-NEXT: sub a0, a0, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.8.32:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a2, a0
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: sext.w a1, a1
-; RV64I-NEXT: blt a1, a2, .LBB2_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: blt a2, a1, .LBB2_3
-; RV64I-NEXT: j .LBB2_4
-; RV64I-NEXT: .LBB2_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bge a2, a1, .LBB2_4
-; RV64I-NEXT: .LBB2_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB2_4:
+; RV64I-NEXT: slt a2, a1, a0
+; RV64I-NEXT: slt a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
ret i8 %1
@@ -117,42 +64,20 @@ define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: beq a1, a3, .LBB3_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: slt a4, a3, a1
-; RV32I-NEXT: bnez a4, .LBB3_3
-; RV32I-NEXT: j .LBB3_4
+; RV32I-NEXT: slt a0, a1, a3
+; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: ret
; RV32I-NEXT: .LBB3_2:
; RV32I-NEXT: sltu a4, a2, a0
-; RV32I-NEXT: beqz a4, .LBB3_4
-; RV32I-NEXT: .LBB3_3:
-; RV32I-NEXT: li a4, 1
-; RV32I-NEXT: .LBB3_4:
-; RV32I-NEXT: beq a1, a3, .LBB3_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: slt a0, a1, a3
-; RV32I-NEXT: bnez a0, .LBB3_7
-; RV32I-NEXT: j .LBB3_8
-; RV32I-NEXT: .LBB3_6:
; RV32I-NEXT: sltu a0, a0, a2
-; RV32I-NEXT: beqz a0, .LBB3_8
-; RV32I-NEXT: .LBB3_7:
-; RV32I-NEXT: li a4, -1
-; RV32I-NEXT: .LBB3_8:
-; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: sub a0, a0, a4
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.8.64:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: blt a1, a0, .LBB3_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: blt a2, a1, .LBB3_3
-; RV64I-NEXT: j .LBB3_4
-; RV64I-NEXT: .LBB3_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bge a2, a1, .LBB3_4
-; RV64I-NEXT: .LBB3_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB3_4:
+; RV64I-NEXT: slt a2, a1, a0
+; RV64I-NEXT: slt a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
%1 = call i8 @llvm.scmp(i64 %x, i64 %y)
ret i8 %1
@@ -161,35 +86,18 @@ define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
define i32 @scmp.32.32(i32 %x, i32 %y) nounwind {
; RV32I-LABEL: scmp.32.32:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: blt a1, a0, .LBB4_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: blt a2, a1, .LBB4_3
-; RV32I-NEXT: j .LBB4_4
-; RV32I-NEXT: .LBB4_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bge a2, a1, .LBB4_4
-; RV32I-NEXT: .LBB4_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB4_4:
+; RV32I-NEXT: slt a2, a1, a0
+; RV32I-NEXT: slt a0, a0, a1
+; RV32I-NEXT: sub a0, a0, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.32.32:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a2, a0
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: sext.w a1, a1
-; RV64I-NEXT: blt a1, a2, .LBB4_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: blt a2, a1, .LBB4_3
-; RV64I-NEXT: j .LBB4_4
-; RV64I-NEXT: .LBB4_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bge a2, a1, .LBB4_4
-; RV64I-NEXT: .LBB4_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB4_4:
+; RV64I-NEXT: slt a2, a1, a0
+; RV64I-NEXT: slt a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
%1 = call i32 @llvm.scmp(i32 %x, i32 %y)
ret i32 %1
@@ -201,42 +109,20 @@ define i32 @scmp.32.64(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: beq a1, a3, .LBB5_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: slt a4, a3, a1
-; RV32I-NEXT: bnez a4, .LBB5_3
-; RV32I-NEXT: j .LBB5_4
+; RV32I-NEXT: slt a0, a1, a3
+; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: ret
; RV32I-NEXT: .LBB5_2:
; RV32I-NEXT: sltu a4, a2, a0
-; RV32I-NEXT: beqz a4, .LBB5_4
-; RV32I-NEXT: .LBB5_3:
-; RV32I-NEXT: li a4, 1
-; RV32I-NEXT: .LBB5_4:
-; RV32I-NEXT: beq a1, a3, .LBB5_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: slt a0, a1, a3
-; RV32I-NEXT: bnez a0, .LBB5_7
-; RV32I-NEXT: j .LBB5_8
-; RV32I-NEXT: .LBB5_6:
; RV32I-NEXT: sltu a0, a0, a2
-; RV32I-NEXT: beqz a0, .LBB5_8
-; RV32I-NEXT: .LBB5_7:
-; RV32I-NEXT: li a4, -1
-; RV32I-NEXT: .LBB5_8:
-; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: sub a0, a0, a4
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.32.64:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: blt a1, a0, .LBB5_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: blt a2, a1, .LBB5_3
-; RV64I-NEXT: j .LBB5_4
-; RV64I-NEXT: .LBB5_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bge a2, a1, .LBB5_4
-; RV64I-NEXT: .LBB5_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB5_4:
+; RV64I-NEXT: slt a2, a1, a0
+; RV64I-NEXT: slt a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
%1 = call i32 @llvm.scmp(i64 %x, i64 %y)
ret i32 %1
@@ -245,46 +131,25 @@ define i32 @scmp.32.64(i64 %x, i64 %y) nounwind {
define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
; RV32I-LABEL: scmp.64.64:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a4, a0
; RV32I-NEXT: beq a1, a3, .LBB6_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slt a0, a3, a1
-; RV32I-NEXT: bnez a0, .LBB6_3
-; RV32I-NEXT: j .LBB6_4
+; RV32I-NEXT: slt a4, a3, a1
+; RV32I-NEXT: slt a1, a1, a3
+; RV32I-NEXT: j .LBB6_3
; RV32I-NEXT: .LBB6_2:
-; RV32I-NEXT: sltu a0, a2, a4
-; RV32I-NEXT: beqz a0, .LBB6_4
+; RV32I-NEXT: sltu a4, a2, a0
+; RV32I-NEXT: sltu a1, a0, a2
; RV32I-NEXT: .LBB6_3:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: .LBB6_4:
-; RV32I-NEXT: beq a1, a3, .LBB6_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: slt a1, a1, a3
-; RV32I-NEXT: bnez a1, .LBB6_7
-; RV32I-NEXT: j .LBB6_8
-; RV32I-NEXT: .LBB6_6:
-; RV32I-NEXT: sltu a1, a4, a2
-; RV32I-NEXT: beqz a1, .LBB6_8
-; RV32I-NEXT: .LBB6_7:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: li a1, -1
-; RV32I-NEXT: .LBB6_8:
+; RV32I-NEXT: sub a0, a1, a4
+; RV32I-NEXT: sltu a1, a1, a4
+; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.64.64:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: blt a1, a0, .LBB6_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: blt a2, a1, .LBB6_3
-; RV64I-NEXT: j .LBB6_4
-; RV64I-NEXT: .LBB6_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bge a2, a1, .LBB6_4
-; RV64I-NEXT: .LBB6_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB6_4:
+; RV64I-NEXT: slt a2, a1, a0
+; RV64I-NEXT: slt a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
%1 = call i64 @llvm.scmp(i64 %x, i64 %y)
ret i64 %1
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll
index e2a95eb974342d..ff24aaaa90931d 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll
@@ -5,34 +5,16 @@
define i8 @ucmp.8.8(i8 zeroext %x, i8 zeroext %y) nounwind {
; RV32I-LABEL: ucmp.8.8:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: bltu a1, a0, .LBB0_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a2, a1, .LBB0_3
-; RV32I-NEXT: j .LBB0_4
-; RV32I-NEXT: .LBB0_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bgeu a2, a1, .LBB0_4
-; RV32I-NEXT: .LBB0_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB0_4:
+; RV32I-NEXT: sltu a2, a1, a0
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: sub a0, a0, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.8.8:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: bltu a1, a0, .LBB0_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB0_3
-; RV64I-NEXT: j .LBB0_4
-; RV64I-NEXT: .LBB0_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a2, a1, .LBB0_4
-; RV64I-NEXT: .LBB0_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB0_4:
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
ret i8 %1
@@ -41,34 +23,16 @@ define i8 @ucmp.8.8(i8 zeroext %x, i8 zeroext %y) nounwind {
define i8 @ucmp.8.16(i16 zeroext %x, i16 zeroext %y) nounwind {
; RV32I-LABEL: ucmp.8.16:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: bltu a1, a0, .LBB1_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a2, a1, .LBB1_3
-; RV32I-NEXT: j .LBB1_4
-; RV32I-NEXT: .LBB1_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bgeu a2, a1, .LBB1_4
-; RV32I-NEXT: .LBB1_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB1_4:
+; RV32I-NEXT: sltu a2, a1, a0
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: sub a0, a0, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.8.16:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: bltu a1, a0, .LBB1_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB1_3
-; RV64I-NEXT: j .LBB1_4
-; RV64I-NEXT: .LBB1_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a2, a1, .LBB1_4
-; RV64I-NEXT: .LBB1_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB1_4:
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
ret i8 %1
@@ -77,37 +41,20 @@ define i8 @ucmp.8.16(i16 zeroext %x, i16 zeroext %y) nounwind {
define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind {
; RV32I-LABEL: ucmp.8.32:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: bltu a1, a0, .LBB2_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a2, a1, .LBB2_3
-; RV32I-NEXT: j .LBB2_4
-; RV32I-NEXT: .LBB2_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bgeu a2, a1, .LBB2_4
-; RV32I-NEXT: .LBB2_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB2_4:
+; RV32I-NEXT: sltu a2, a1, a0
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: sub a0, a0, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.8.32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: srli a2, a2, 32
-; RV64I-NEXT: bltu a2, a1, .LBB2_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a1, a2, .LBB2_3
-; RV64I-NEXT: j .LBB2_4
-; RV64I-NEXT: .LBB2_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a1, a2, .LBB2_4
-; RV64I-NEXT: .LBB2_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB2_4:
+; RV64I-NEXT: slli a1, a1, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: srli a1, a1, 32
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
ret i8 %1
@@ -119,42 +66,20 @@ define i8 @ucmp.8.64(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: beq a1, a3, .LBB3_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sltu a4, a3, a1
-; RV32I-NEXT: bnez a4, .LBB3_3
-; RV32I-NEXT: j .LBB3_4
+; RV32I-NEXT: sltu a0, a1, a3
+; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: ret
; RV32I-NEXT: .LBB3_2:
; RV32I-NEXT: sltu a4, a2, a0
-; RV32I-NEXT: beqz a4, .LBB3_4
-; RV32I-NEXT: .LBB3_3:
-; RV32I-NEXT: li a4, 1
-; RV32I-NEXT: .LBB3_4:
-; RV32I-NEXT: beq a1, a3, .LBB3_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: sltu a0, a1, a3
-; RV32I-NEXT: bnez a0, .LBB3_7
-; RV32I-NEXT: j .LBB3_8
-; RV32I-NEXT: .LBB3_6:
; RV32I-NEXT: sltu a0, a0, a2
-; RV32I-NEXT: beqz a0, .LBB3_8
-; RV32I-NEXT: .LBB3_7:
-; RV32I-NEXT: li a4, -1
-; RV32I-NEXT: .LBB3_8:
-; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: sub a0, a0, a4
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.8.64:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: bltu a1, a0, .LBB3_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB3_3
-; RV64I-NEXT: j .LBB3_4
-; RV64I-NEXT: .LBB3_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a2, a1, .LBB3_4
-; RV64I-NEXT: .LBB3_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB3_4:
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
%1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
ret i8 %1
@@ -163,37 +88,20 @@ define i8 @ucmp.8.64(i64 %x, i64 %y) nounwind {
define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
; RV32I-LABEL: ucmp.32.32:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: bltu a1, a0, .LBB4_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a2, a1, .LBB4_3
-; RV32I-NEXT: j .LBB4_4
-; RV32I-NEXT: .LBB4_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bgeu a2, a1, .LBB4_4
-; RV32I-NEXT: .LBB4_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB4_4:
+; RV32I-NEXT: sltu a2, a1, a0
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: sub a0, a0, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.32.32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: srli a2, a2, 32
-; RV64I-NEXT: bltu a2, a1, .LBB4_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a1, a2, .LBB4_3
-; RV64I-NEXT: j .LBB4_4
-; RV64I-NEXT: .LBB4_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a1, a2, .LBB4_4
-; RV64I-NEXT: .LBB4_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB4_4:
+; RV64I-NEXT: slli a1, a1, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: srli a1, a1, 32
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
@@ -202,37 +110,20 @@ define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
define i32 @ucmp.32.32_sext(i32 signext %x, i32 signext %y) nounwind {
; RV32I-LABEL: ucmp.32.32_sext:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: bltu a1, a0, .LBB5_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a2, a1, .LBB5_3
-; RV32I-NEXT: j .LBB5_4
-; RV32I-NEXT: .LBB5_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bgeu a2, a1, .LBB5_4
-; RV32I-NEXT: .LBB5_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB5_4:
+; RV32I-NEXT: sltu a2, a1, a0
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: sub a0, a0, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.32.32_sext:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: srli a2, a2, 32
-; RV64I-NEXT: bltu a2, a1, .LBB5_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a1, a2, .LBB5_3
-; RV64I-NEXT: j .LBB5_4
-; RV64I-NEXT: .LBB5_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a1, a2, .LBB5_4
-; RV64I-NEXT: .LBB5_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB5_4:
+; RV64I-NEXT: slli a1, a1, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: srli a1, a1, 32
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
@@ -241,34 +132,16 @@ define i32 @ucmp.32.32_sext(i32 signext %x, i32 signext %y) nounwind {
define i32 @ucmp.32.32_zext(i32 zeroext %x, i32 zeroext %y) nounwind {
; RV32I-LABEL: ucmp.32.32_zext:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: bltu a1, a0, .LBB6_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a2, a1, .LBB6_3
-; RV32I-NEXT: j .LBB6_4
-; RV32I-NEXT: .LBB6_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bgeu a2, a1, .LBB6_4
-; RV32I-NEXT: .LBB6_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB6_4:
+; RV32I-NEXT: sltu a2, a1, a0
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: sub a0, a0, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.32.32_zext:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: bltu a1, a0, .LBB6_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB6_3
-; RV64I-NEXT: j .LBB6_4
-; RV64I-NEXT: .LBB6_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a2, a1, .LBB6_4
-; RV64I-NEXT: .LBB6_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB6_4:
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
@@ -280,42 +153,20 @@ define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: beq a1, a3, .LBB7_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sltu a4, a3, a1
-; RV32I-NEXT: bnez a4, .LBB7_3
-; RV32I-NEXT: j .LBB7_4
+; RV32I-NEXT: sltu a0, a1, a3
+; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: ret
; RV32I-NEXT: .LBB7_2:
; RV32I-NEXT: sltu a4, a2, a0
-; RV32I-NEXT: beqz a4, .LBB7_4
-; RV32I-NEXT: .LBB7_3:
-; RV32I-NEXT: li a4, 1
-; RV32I-NEXT: .LBB7_4:
-; RV32I-NEXT: beq a1, a3, .LBB7_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: sltu a0, a1, a3
-; RV32I-NEXT: bnez a0, .LBB7_7
-; RV32I-NEXT: j .LBB7_8
-; RV32I-NEXT: .LBB7_6:
; RV32I-NEXT: sltu a0, a0, a2
-; RV32I-NEXT: beqz a0, .LBB7_8
-; RV32I-NEXT: .LBB7_7:
-; RV32I-NEXT: li a4, -1
-; RV32I-NEXT: .LBB7_8:
-; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: sub a0, a0, a4
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.32.64:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: bltu a1, a0, .LBB7_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB7_3
-; RV64I-NEXT: j .LBB7_4
-; RV64I-NEXT: .LBB7_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a2, a1, .LBB7_4
-; RV64I-NEXT: .LBB7_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB7_4:
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
%1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
ret i32 %1
@@ -324,46 +175,25 @@ define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind {
; RV32I-LABEL: ucmp.64.64:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a4, a0
; RV32I-NEXT: beq a1, a3, .LBB8_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu a0, a3, a1
-; RV32I-NEXT: bnez a0, .LBB8_3
-; RV32I-NEXT: j .LBB8_4
+; RV32I-NEXT: sltu a4, a3, a1
+; RV32I-NEXT: sltu a1, a1, a3
+; RV32I-NEXT: j .LBB8_3
; RV32I-NEXT: .LBB8_2:
-; RV32I-NEXT: sltu a0, a2, a4
-; RV32I-NEXT: beqz a0, .LBB8_4
+; RV32I-NEXT: sltu a4, a2, a0
+; RV32I-NEXT: sltu a1, a0, a2
; RV32I-NEXT: .LBB8_3:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: .LBB8_4:
-; RV32I-NEXT: beq a1, a3, .LBB8_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: sltu a1, a1, a3
-; RV32I-NEXT: bnez a1, .LBB8_7
-; RV32I-NEXT: j .LBB8_8
-; RV32I-NEXT: .LBB8_6:
-; RV32I-NEXT: sltu a1, a4, a2
-; RV32I-NEXT: beqz a1, .LBB8_8
-; RV32I-NEXT: .LBB8_7:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: li a1, -1
-; RV32I-NEXT: .LBB8_8:
+; RV32I-NEXT: sub a0, a1, a4
+; RV32I-NEXT: sltu a1, a1, a4
+; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.64.64:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: bltu a1, a0, .LBB8_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB8_3
-; RV64I-NEXT: j .LBB8_4
-; RV64I-NEXT: .LBB8_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a2, a1, .LBB8_4
-; RV64I-NEXT: .LBB8_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB8_4:
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
%1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
ret i64 %1
>From 002dabc7c0d3aff67cb2aad99c790119495b9520 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 9 Dec 2024 13:58:52 -0800
Subject: [PATCH 2/2] fixup! Add check for UndefinedBooleanContents. Add
missing compare.
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 6 ++-
.../legalizer/legalize-threeway-cmp-rv32.mir | 4 +-
.../legalizer/legalize-threeway-cmp-rv64.mir | 4 +-
llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll | 34 +++++++--------
llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll | 42 +++++++++----------
5 files changed, 46 insertions(+), 44 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 1f414169db022c..f84af0b0caf13e 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7938,15 +7938,17 @@ LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) {
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
const DataLayout &DL = MIRBuilder.getDataLayout();
+ auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
if (TLI.shouldExpandCmpUsingSelects(
- getApproximateEVTForLLT(SrcTy, DL, Ctx))) {
+ getApproximateEVTForLLT(SrcTy, DL, Ctx)) ||
+ BC == TargetLowering::UndefinedBooleanContent) {
auto One = MIRBuilder.buildConstant(DstTy, 1);
auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
} else {
- if (TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false))
+ if (BC == TargetLowering::ZeroOrNegativeOneBooleanContent)
std::swap(IsGT, IsLT);
unsigned BoolExtOp =
MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir
index 91743a4a8d0f86..4ffca796a4c20b 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir
@@ -14,7 +14,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]]
- ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[ICMP1]], [[ICMP]]
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[ICMP]], [[ICMP1]]
; CHECK-NEXT: $x10 = COPY [[SUB]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s32) = COPY $x10
@@ -38,7 +38,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]]
- ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[ICMP1]], [[ICMP]]
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[ICMP]], [[ICMP1]]
; CHECK-NEXT: $x10 = COPY [[SUB]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s32) = COPY $x10
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir
index e26e89d2ffdab4..9e60a767c55feb 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir
@@ -16,7 +16,7 @@ body: |
; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 32
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sgt), [[SEXT_INREG]](s64), [[SEXT_INREG1]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(slt), [[SEXT_INREG]](s64), [[SEXT_INREG1]]
- ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[ICMP1]], [[ICMP]]
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[ICMP]], [[ICMP1]]
; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 32
; CHECK-NEXT: $x10 = COPY [[SEXT_INREG2]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
@@ -45,7 +45,7 @@ body: |
; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 32
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sgt), [[SEXT_INREG]](s64), [[SEXT_INREG1]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(slt), [[SEXT_INREG]](s64), [[SEXT_INREG1]]
- ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[ICMP1]], [[ICMP]]
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[ICMP]], [[ICMP1]]
; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 32
; CHECK-NEXT: $x10 = COPY [[SEXT_INREG2]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll
index c26e30721c8482..4346e04ecda667 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll
@@ -7,14 +7,14 @@ define i8 @scmp.8.8(i8 signext %x, i8 signext %y) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: slt a2, a1, a0
; RV32I-NEXT: slt a0, a0, a1
-; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.8.8:
; RV64I: # %bb.0:
; RV64I-NEXT: slt a2, a1, a0
; RV64I-NEXT: slt a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i8 @llvm.scmp(i8 %x, i8 %y)
ret i8 %1
@@ -25,14 +25,14 @@ define i8 @scmp.8.16(i16 signext %x, i16 signext %y) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: slt a2, a1, a0
; RV32I-NEXT: slt a0, a0, a1
-; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.8.16:
; RV64I: # %bb.0:
; RV64I-NEXT: slt a2, a1, a0
; RV64I-NEXT: slt a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i8 @llvm.scmp(i16 %x, i16 %y)
ret i8 %1
@@ -43,7 +43,7 @@ define i8 @scmp.8.32(i32 %x, i32 %y) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: slt a2, a1, a0
; RV32I-NEXT: slt a0, a0, a1
-; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.8.32:
@@ -52,7 +52,7 @@ define i8 @scmp.8.32(i32 %x, i32 %y) nounwind {
; RV64I-NEXT: sext.w a1, a1
; RV64I-NEXT: slt a2, a1, a0
; RV64I-NEXT: slt a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
ret i8 %1
@@ -65,19 +65,19 @@ define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: slt a4, a3, a1
; RV32I-NEXT: slt a0, a1, a3
-; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: sub a0, a4, a0
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB3_2:
; RV32I-NEXT: sltu a4, a2, a0
; RV32I-NEXT: sltu a0, a0, a2
-; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: sub a0, a4, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.8.64:
; RV64I: # %bb.0:
; RV64I-NEXT: slt a2, a1, a0
; RV64I-NEXT: slt a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i8 @llvm.scmp(i64 %x, i64 %y)
ret i8 %1
@@ -88,7 +88,7 @@ define i32 @scmp.32.32(i32 %x, i32 %y) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: slt a2, a1, a0
; RV32I-NEXT: slt a0, a0, a1
-; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.32.32:
@@ -97,7 +97,7 @@ define i32 @scmp.32.32(i32 %x, i32 %y) nounwind {
; RV64I-NEXT: sext.w a1, a1
; RV64I-NEXT: slt a2, a1, a0
; RV64I-NEXT: slt a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i32 @llvm.scmp(i32 %x, i32 %y)
ret i32 %1
@@ -110,19 +110,19 @@ define i32 @scmp.32.64(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: slt a4, a3, a1
; RV32I-NEXT: slt a0, a1, a3
-; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: sub a0, a4, a0
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB5_2:
; RV32I-NEXT: sltu a4, a2, a0
; RV32I-NEXT: sltu a0, a0, a2
-; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: sub a0, a4, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.32.64:
; RV64I: # %bb.0:
; RV64I-NEXT: slt a2, a1, a0
; RV64I-NEXT: slt a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i32 @llvm.scmp(i64 %x, i64 %y)
ret i32 %1
@@ -140,8 +140,8 @@ define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: sltu a4, a2, a0
; RV32I-NEXT: sltu a1, a0, a2
; RV32I-NEXT: .LBB6_3:
-; RV32I-NEXT: sub a0, a1, a4
-; RV32I-NEXT: sltu a1, a1, a4
+; RV32I-NEXT: sub a0, a4, a1
+; RV32I-NEXT: sltu a1, a4, a1
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: ret
;
@@ -149,7 +149,7 @@ define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: slt a2, a1, a0
; RV64I-NEXT: slt a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i64 @llvm.scmp(i64 %x, i64 %y)
ret i64 %1
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll
index ff24aaaa90931d..c3abf51fd05bc8 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll
@@ -7,14 +7,14 @@ define i8 @ucmp.8.8(i8 zeroext %x, i8 zeroext %y) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: sltu a2, a1, a0
; RV32I-NEXT: sltu a0, a0, a1
-; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.8.8:
; RV64I: # %bb.0:
; RV64I-NEXT: sltu a2, a1, a0
; RV64I-NEXT: sltu a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
ret i8 %1
@@ -25,14 +25,14 @@ define i8 @ucmp.8.16(i16 zeroext %x, i16 zeroext %y) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: sltu a2, a1, a0
; RV32I-NEXT: sltu a0, a0, a1
-; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.8.16:
; RV64I: # %bb.0:
; RV64I-NEXT: sltu a2, a1, a0
; RV64I-NEXT: sltu a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
ret i8 %1
@@ -43,7 +43,7 @@ define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: sltu a2, a1, a0
; RV32I-NEXT: sltu a0, a0, a1
-; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.8.32:
@@ -54,7 +54,7 @@ define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind {
; RV64I-NEXT: srli a1, a1, 32
; RV64I-NEXT: sltu a2, a1, a0
; RV64I-NEXT: sltu a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
ret i8 %1
@@ -67,19 +67,19 @@ define i8 @ucmp.8.64(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sltu a4, a3, a1
; RV32I-NEXT: sltu a0, a1, a3
-; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: sub a0, a4, a0
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB3_2:
; RV32I-NEXT: sltu a4, a2, a0
; RV32I-NEXT: sltu a0, a0, a2
-; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: sub a0, a4, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.8.64:
; RV64I: # %bb.0:
; RV64I-NEXT: sltu a2, a1, a0
; RV64I-NEXT: sltu a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
ret i8 %1
@@ -90,7 +90,7 @@ define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: sltu a2, a1, a0
; RV32I-NEXT: sltu a0, a0, a1
-; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.32.32:
@@ -101,7 +101,7 @@ define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
; RV64I-NEXT: srli a1, a1, 32
; RV64I-NEXT: sltu a2, a1, a0
; RV64I-NEXT: sltu a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
@@ -112,7 +112,7 @@ define i32 @ucmp.32.32_sext(i32 signext %x, i32 signext %y) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: sltu a2, a1, a0
; RV32I-NEXT: sltu a0, a0, a1
-; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.32.32_sext:
@@ -123,7 +123,7 @@ define i32 @ucmp.32.32_sext(i32 signext %x, i32 signext %y) nounwind {
; RV64I-NEXT: srli a1, a1, 32
; RV64I-NEXT: sltu a2, a1, a0
; RV64I-NEXT: sltu a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
@@ -134,14 +134,14 @@ define i32 @ucmp.32.32_zext(i32 zeroext %x, i32 zeroext %y) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: sltu a2, a1, a0
; RV32I-NEXT: sltu a0, a0, a1
-; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.32.32_zext:
; RV64I: # %bb.0:
; RV64I-NEXT: sltu a2, a1, a0
; RV64I-NEXT: sltu a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
@@ -154,19 +154,19 @@ define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sltu a4, a3, a1
; RV32I-NEXT: sltu a0, a1, a3
-; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: sub a0, a4, a0
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB7_2:
; RV32I-NEXT: sltu a4, a2, a0
; RV32I-NEXT: sltu a0, a0, a2
-; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: sub a0, a4, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.32.64:
; RV64I: # %bb.0:
; RV64I-NEXT: sltu a2, a1, a0
; RV64I-NEXT: sltu a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
ret i32 %1
@@ -184,8 +184,8 @@ define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: sltu a4, a2, a0
; RV32I-NEXT: sltu a1, a0, a2
; RV32I-NEXT: .LBB8_3:
-; RV32I-NEXT: sub a0, a1, a4
-; RV32I-NEXT: sltu a1, a1, a4
+; RV32I-NEXT: sub a0, a4, a1
+; RV32I-NEXT: sltu a1, a4, a1
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: ret
;
@@ -193,7 +193,7 @@ define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: sltu a2, a1, a0
; RV64I-NEXT: sltu a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
ret i64 %1
More information about the llvm-commits
mailing list