[llvm] de1a423 - [GISel][RISCV][AArch64] Support legalizing G_SCMP/G_UCMP to sub(isgt,islt). (#119265)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 15 20:47:21 PST 2024
Author: Craig Topper
Date: 2024-12-15T20:47:17-08:00
New Revision: de1a423c2356d2040cab74e657ed024bf9ce8517
URL: https://github.com/llvm/llvm-project/commit/de1a423c2356d2040cab74e657ed024bf9ce8517
DIFF: https://github.com/llvm/llvm-project/commit/de1a423c2356d2040cab74e657ed024bf9ce8517.diff
LOG: [GISel][RISCV][AArch64] Support legalizing G_SCMP/G_UCMP to sub(isgt,islt). (#119265)
Convert the LLT to EVT and call
TargetLowering::shouldExpandCmpUsingSelects to determine if we should do
this.
We don't have a getSetccResultType, so I'm boolean extending the
compares to the result type and using that. If the compares legalize to
the same type, these extends will get removed. Unfortunately, if the
compares legalize to a different type, we end up with truncates or
extends that might not be optimally placed.
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir
llvm/test/CodeGen/AArch64/scmp.ll
llvm/test/CodeGen/AArch64/ucmp.ll
llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir
llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir
llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll
llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 2fc8ef6a52a528..c0f52e9b5cb16e 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7950,6 +7950,7 @@ LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) {
Register Dst = Cmp->getReg(0);
LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Cmp->getReg(1));
LLT CmpTy = DstTy.changeElementSize(1);
CmpInst::Predicate LTPredicate = Cmp->isSigned()
@@ -7959,16 +7960,32 @@ LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) {
? CmpInst::Predicate::ICMP_SGT
: CmpInst::Predicate::ICMP_UGT;
- auto One = MIRBuilder.buildConstant(DstTy, 1);
auto Zero = MIRBuilder.buildConstant(DstTy, 0);
auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
Cmp->getRHSReg());
- auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
-
- auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
Cmp->getRHSReg());
- MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
+
+ auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
+ auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
+ if (TLI.shouldExpandCmpUsingSelects(getApproximateEVTForLLT(SrcTy, Ctx)) ||
+ BC == TargetLowering::UndefinedBooleanContent) {
+ auto One = MIRBuilder.buildConstant(DstTy, 1);
+ auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
+
+ auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
+ MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
+ } else {
+ if (BC == TargetLowering::ZeroOrNegativeOneBooleanContent)
+ std::swap(IsGT, IsLT);
+ // Extend boolean results to DstTy, which is at least i2, before subtracting
+ // them.
+ unsigned BoolExtOp =
+ MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
+ IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
+ IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
+ MIRBuilder.buildSub(Dst, IsGT, IsLT);
+ }
MI.eraseFromParent();
return Legalized;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir
index 18c4f3c31efa53..ae16e40671785f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir
@@ -8,10 +8,10 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]]
+ ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]]
- ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]]
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT1]], 2
@@ -31,10 +31,10 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]]
+ ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]]
- ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]]
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT1]], 2
@@ -61,42 +61,13 @@ body: |
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $w2
; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $w3
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(ugt), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC]], [[UV]]
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR3]](<8 x s8>)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<4 x s16>), [[UV3:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV2]], [[TRUNC1]]
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR4]](<8 x s8>)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s16>), [[UV5:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV4]], [[XOR]]
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]]
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(ult), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C3]](s8), [[C3]](s8), [[C3]](s8), [[C3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR5]](<8 x s8>)
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s16>), [[UV7:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT3]](<8 x s16>)
- ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC2]], [[UV6]]
- ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C3]](s8), [[C3]](s8), [[C3]](s8), [[C3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR6]](<8 x s8>)
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT4]](<8 x s16>)
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV8]], [[TRUNC3]]
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s16>) = G_AND [[OR]], [[XOR1]]
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND2]], [[AND3]]
- ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR1]](<4 x s16>)
- ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<4 x s32>) = G_SEXT_INREG [[ANYEXT5]], 2
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<4 x s16>) = G_SUB [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[SUB]](<4 x s16>)
+ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<4 x s32>) = G_SEXT_INREG [[ANYEXT]], 2
; CHECK-NEXT: $q0 = COPY [[SEXT_INREG]](<4 x s32>)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
@@ -125,15 +96,15 @@ body: |
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[DEF]](s64), [[DEF]]
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[DEF]](s64), [[DEF]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s32), [[ICMP]], [[ICMP1]]
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[SELECT]](s32), [[C]], [[C1]]
; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[DEF]](s64), [[DEF]]
; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[DEF]](s64), [[DEF]]
- ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s32), [[ICMP3]], [[ICMP4]]
+ ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s32), [[ICMP3]], [[ICMP4]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[SELECT]](s32), [[C]], [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
- ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[SELECT2]](s32), [[C2]], [[SELECT1]]
+ ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[SELECT1]](s32), [[C2]], [[SELECT2]]
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT3]], 2
; CHECK-NEXT: $w0 = COPY [[SEXT_INREG]](s32)
%0:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/scmp.ll b/llvm/test/CodeGen/AArch64/scmp.ll
index be167fde7dbe6d..7a73578f43e80c 100644
--- a/llvm/test/CodeGen/AArch64/scmp.ll
+++ b/llvm/test/CodeGen/AArch64/scmp.ll
@@ -84,13 +84,13 @@ define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: cset w9, gt
; CHECK-GI-NEXT: csel w8, w8, w9, eq
-; CHECK-GI-NEXT: tst w8, #0x1
-; CHECK-GI-NEXT: cset w8, ne
; CHECK-GI-NEXT: cmp x0, x2
; CHECK-GI-NEXT: cset w9, lo
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: cset w10, lt
; CHECK-GI-NEXT: csel w9, w9, w10, eq
+; CHECK-GI-NEXT: tst w8, #0x1
+; CHECK-GI-NEXT: cset w8, ne
; CHECK-GI-NEXT: tst w9, #0x1
; CHECK-GI-NEXT: csinv w0, w8, wzr, eq
; CHECK-GI-NEXT: ret
@@ -132,88 +132,48 @@ define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
}
define <8 x i8> @s_v8i8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-SD-LABEL: s_v8i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT: cmgt v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT: sub v0.8b, v0.8b, v2.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v8i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.8b, #1
-; CHECK-GI-NEXT: cmgt v3.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b
-; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v8i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.8b, v0.8b, v1.8b
+; CHECK-NEXT: cmgt v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: sub v0.8b, v0.8b, v2.8b
+; CHECK-NEXT: ret
entry:
%c = call <8 x i8> @llvm.scmp(<8 x i8> %a, <8 x i8> %b)
ret <8 x i8> %c
}
define <16 x i8> @s_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SD-LABEL: s_v16i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT: cmgt v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT: sub v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v16i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.16b, #1
-; CHECK-GI-NEXT: cmgt v3.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.16b, v0.16b, v1.16b
+; CHECK-NEXT: cmgt v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: sub v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: ret
entry:
%c = call <16 x i8> @llvm.scmp(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %c
}
define <4 x i16> @s_v4i16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-SD-LABEL: s_v4i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.4h, v0.4h, v1.4h
-; CHECK-SD-NEXT: cmgt v0.4h, v1.4h, v0.4h
-; CHECK-SD-NEXT: sub v0.4h, v0.4h, v2.4h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v4i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.4h, #1
-; CHECK-GI-NEXT: cmgt v3.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.4h, v1.4h, v0.4h
-; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b
-; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.4h, v0.4h, v1.4h
+; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h
+; CHECK-NEXT: sub v0.4h, v0.4h, v2.4h
+; CHECK-NEXT: ret
entry:
%c = call <4 x i16> @llvm.scmp(<4 x i16> %a, <4 x i16> %b)
ret <4 x i16> %c
}
define <8 x i16> @s_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SD-LABEL: s_v8i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.8h, v0.8h, v1.8h
-; CHECK-SD-NEXT: cmgt v0.8h, v1.8h, v0.8h
-; CHECK-SD-NEXT: sub v0.8h, v0.8h, v2.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v8i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.8h, #1
-; CHECK-GI-NEXT: cmgt v3.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.8h, v1.8h, v0.8h
-; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.8h, v0.8h, v1.8h
+; CHECK-NEXT: cmgt v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: sub v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%c = call <8 x i16> @llvm.scmp(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %c
@@ -232,16 +192,12 @@ define <16 x i16> @s_v16i16(<16 x i16> %a, <16 x i16> %b) {
;
; CHECK-GI-LABEL: s_v16i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v4.8h, #1
-; CHECK-GI-NEXT: cmgt v5.8h, v0.8h, v2.8h
-; CHECK-GI-NEXT: cmgt v6.8h, v1.8h, v3.8h
-; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: cmgt v4.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: cmgt v5.8h, v1.8h, v3.8h
; CHECK-GI-NEXT: cmgt v0.8h, v2.8h, v0.8h
; CHECK-GI-NEXT: cmgt v1.8h, v3.8h, v1.8h
-; CHECK-GI-NEXT: and v5.16b, v4.16b, v5.16b
-; CHECK-GI-NEXT: and v4.16b, v4.16b, v6.16b
-; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v5.16b
-; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v4.16b
+; CHECK-GI-NEXT: sub v0.8h, v0.8h, v4.8h
+; CHECK-GI-NEXT: sub v1.8h, v1.8h, v5.8h
; CHECK-GI-NEXT: ret
entry:
%c = call <16 x i16> @llvm.scmp(<16 x i16> %a, <16 x i16> %b)
@@ -249,44 +205,24 @@ entry:
}
define <2 x i32> @s_v2i32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-SD-LABEL: s_v2i32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.2s, v0.2s, v1.2s
-; CHECK-SD-NEXT: cmgt v0.2s, v1.2s, v0.2s
-; CHECK-SD-NEXT: sub v0.2s, v0.2s, v2.2s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v2i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.2s, #1
-; CHECK-GI-NEXT: cmgt v3.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b
-; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.2s, v0.2s, v1.2s
+; CHECK-NEXT: cmgt v0.2s, v1.2s, v0.2s
+; CHECK-NEXT: sub v0.2s, v0.2s, v2.2s
+; CHECK-NEXT: ret
entry:
%c = call <2 x i32> @llvm.scmp(<2 x i32> %a, <2 x i32> %b)
ret <2 x i32> %c
}
define <4 x i32> @s_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SD-LABEL: s_v4i32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: cmgt v0.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT: sub v0.4s, v0.4s, v2.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v4i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.4s, #1
-; CHECK-GI-NEXT: cmgt v3.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%c = call <4 x i32> @llvm.scmp(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %c
@@ -305,16 +241,12 @@ define <8 x i32> @s_v8i32(<8 x i32> %a, <8 x i32> %b) {
;
; CHECK-GI-LABEL: s_v8i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v4.4s, #1
-; CHECK-GI-NEXT: cmgt v5.4s, v0.4s, v2.4s
-; CHECK-GI-NEXT: cmgt v6.4s, v1.4s, v3.4s
-; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: cmgt v4.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: cmgt v5.4s, v1.4s, v3.4s
; CHECK-GI-NEXT: cmgt v0.4s, v2.4s, v0.4s
; CHECK-GI-NEXT: cmgt v1.4s, v3.4s, v1.4s
-; CHECK-GI-NEXT: and v5.16b, v4.16b, v5.16b
-; CHECK-GI-NEXT: and v4.16b, v4.16b, v6.16b
-; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v5.16b
-; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v4.16b
+; CHECK-GI-NEXT: sub v0.4s, v0.4s, v4.4s
+; CHECK-GI-NEXT: sub v1.4s, v1.4s, v5.4s
; CHECK-GI-NEXT: ret
entry:
%c = call <8 x i32> @llvm.scmp(<8 x i32> %a, <8 x i32> %b)
@@ -322,23 +254,12 @@ entry:
}
define <2 x i64> @s_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SD-LABEL: s_v2i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT: cmgt v0.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT: sub v0.2d, v0.2d, v2.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v2i64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI16_0
-; CHECK-GI-NEXT: cmgt v2.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI16_0]
-; CHECK-GI-NEXT: cmgt v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: and v2.16b, v3.16b, v2.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d
+; CHECK-NEXT: cmgt v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: sub v0.2d, v0.2d, v2.2d
+; CHECK-NEXT: ret
entry:
%c = call <2 x i64> @llvm.scmp(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %c
@@ -357,17 +278,12 @@ define <4 x i64> @s_v4i64(<4 x i64> %a, <4 x i64> %b) {
;
; CHECK-GI-LABEL: s_v4i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI17_0
; CHECK-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d
-; CHECK-GI-NEXT: cmgt v6.2d, v1.2d, v3.2d
-; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI17_0]
-; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: cmgt v5.2d, v1.2d, v3.2d
; CHECK-GI-NEXT: cmgt v0.2d, v2.2d, v0.2d
; CHECK-GI-NEXT: cmgt v1.2d, v3.2d, v1.2d
-; CHECK-GI-NEXT: and v4.16b, v5.16b, v4.16b
-; CHECK-GI-NEXT: and v5.16b, v5.16b, v6.16b
-; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v4.16b
-; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v5.16b
+; CHECK-GI-NEXT: sub v0.2d, v0.2d, v4.2d
+; CHECK-GI-NEXT: sub v1.2d, v1.2d, v5.2d
; CHECK-GI-NEXT: ret
entry:
%c = call <4 x i64> @llvm.scmp(<4 x i64> %a, <4 x i64> %b)
@@ -392,16 +308,13 @@ define <16 x i8> @signOf_neon_scmp(<8 x i16> %s0_lo, <8 x i16> %s0_hi, <8 x i16>
; CHECK-GI-NEXT: cmgt v5.8h, v1.8h, v3.8h
; CHECK-GI-NEXT: cmgt v0.8h, v2.8h, v0.8h
; CHECK-GI-NEXT: cmgt v1.8h, v3.8h, v1.8h
-; CHECK-GI-NEXT: movi v2.16b, #1
-; CHECK-GI-NEXT: movi v3.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: uzp1 v4.16b, v4.16b, v5.16b
; CHECK-GI-NEXT: uzp1 v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: shl v1.16b, v4.16b, #7
+; CHECK-GI-NEXT: uzp1 v1.16b, v4.16b, v5.16b
; CHECK-GI-NEXT: shl v0.16b, v0.16b, #7
-; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7
+; CHECK-GI-NEXT: shl v1.16b, v1.16b, #7
; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7
-; CHECK-GI-NEXT: and v1.16b, v2.16b, v1.16b
-; CHECK-GI-NEXT: bsl v0.16b, v3.16b, v1.16b
+; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7
+; CHECK-GI-NEXT: sub v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
%0 = shufflevector <8 x i16> %s0_lo, <8 x i16> %s0_hi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
diff --git a/llvm/test/CodeGen/AArch64/ucmp.ll b/llvm/test/CodeGen/AArch64/ucmp.ll
index 0e4da89fcaebc5..ad46e4abc477ce 100644
--- a/llvm/test/CodeGen/AArch64/ucmp.ll
+++ b/llvm/test/CodeGen/AArch64/ucmp.ll
@@ -84,13 +84,13 @@ define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind {
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: cset w9, hi
; CHECK-GI-NEXT: csel w8, w8, w9, eq
-; CHECK-GI-NEXT: tst w8, #0x1
-; CHECK-GI-NEXT: cset w8, ne
; CHECK-GI-NEXT: cmp x0, x2
; CHECK-GI-NEXT: cset w9, lo
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: cset w10, lo
; CHECK-GI-NEXT: csel w9, w9, w10, eq
+; CHECK-GI-NEXT: tst w8, #0x1
+; CHECK-GI-NEXT: cset w8, ne
; CHECK-GI-NEXT: tst w9, #0x1
; CHECK-GI-NEXT: csinv w0, w8, wzr, eq
; CHECK-GI-NEXT: ret
@@ -154,15 +154,15 @@ define <1 x i64> @ucmp.1.64.65(<1 x i65> %x, <1 x i65> %y) {
; CHECK-GI-NEXT: cmp x8, x9
; CHECK-GI-NEXT: cset w11, hi
; CHECK-GI-NEXT: csel w10, w10, w11, eq
-; CHECK-GI-NEXT: tst w10, #0x1
-; CHECK-GI-NEXT: cset x10, ne
; CHECK-GI-NEXT: cmp x0, x2
; CHECK-GI-NEXT: cset w11, lo
; CHECK-GI-NEXT: cmp x8, x9
; CHECK-GI-NEXT: cset w8, lo
; CHECK-GI-NEXT: csel w8, w11, w8, eq
+; CHECK-GI-NEXT: tst w10, #0x1
+; CHECK-GI-NEXT: cset x9, ne
; CHECK-GI-NEXT: tst w8, #0x1
-; CHECK-GI-NEXT: csinv x8, x10, xzr, eq
+; CHECK-GI-NEXT: csinv x8, x9, xzr, eq
; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: ret
%1 = call <1 x i64> @llvm.ucmp(<1 x i65> %x, <1 x i65> %y)
@@ -170,88 +170,48 @@ define <1 x i64> @ucmp.1.64.65(<1 x i65> %x, <1 x i65> %y) {
}
define <8 x i8> @u_v8i8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-SD-LABEL: u_v8i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT: cmhi v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT: sub v0.8b, v0.8b, v2.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: u_v8i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.8b, #1
-; CHECK-GI-NEXT: cmhi v3.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmhi v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b
-; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: u_v8i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.8b, v0.8b, v1.8b
+; CHECK-NEXT: cmhi v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: sub v0.8b, v0.8b, v2.8b
+; CHECK-NEXT: ret
entry:
%c = call <8 x i8> @llvm.ucmp(<8 x i8> %a, <8 x i8> %b)
ret <8 x i8> %c
}
define <16 x i8> @u_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SD-LABEL: u_v16i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT: cmhi v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT: sub v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: u_v16i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.16b, #1
-; CHECK-GI-NEXT: cmhi v3.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmhi v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: u_v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.16b, v0.16b, v1.16b
+; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: sub v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: ret
entry:
%c = call <16 x i8> @llvm.ucmp(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %c
}
define <4 x i16> @u_v4i16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-SD-LABEL: u_v4i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.4h, v0.4h, v1.4h
-; CHECK-SD-NEXT: cmhi v0.4h, v1.4h, v0.4h
-; CHECK-SD-NEXT: sub v0.4h, v0.4h, v2.4h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: u_v4i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.4h, #1
-; CHECK-GI-NEXT: cmhi v3.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmhi v0.4h, v1.4h, v0.4h
-; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b
-; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: u_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.4h, v0.4h, v1.4h
+; CHECK-NEXT: cmhi v0.4h, v1.4h, v0.4h
+; CHECK-NEXT: sub v0.4h, v0.4h, v2.4h
+; CHECK-NEXT: ret
entry:
%c = call <4 x i16> @llvm.ucmp(<4 x i16> %a, <4 x i16> %b)
ret <4 x i16> %c
}
define <8 x i16> @u_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SD-LABEL: u_v8i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.8h, v0.8h, v1.8h
-; CHECK-SD-NEXT: cmhi v0.8h, v1.8h, v0.8h
-; CHECK-SD-NEXT: sub v0.8h, v0.8h, v2.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: u_v8i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.8h, #1
-; CHECK-GI-NEXT: cmhi v3.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmhi v0.8h, v1.8h, v0.8h
-; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: u_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.8h, v0.8h, v1.8h
+; CHECK-NEXT: cmhi v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: sub v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%c = call <8 x i16> @llvm.ucmp(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %c
@@ -270,16 +230,12 @@ define <16 x i16> @u_v16i16(<16 x i16> %a, <16 x i16> %b) {
;
; CHECK-GI-LABEL: u_v16i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v4.8h, #1
-; CHECK-GI-NEXT: cmhi v5.8h, v0.8h, v2.8h
-; CHECK-GI-NEXT: cmhi v6.8h, v1.8h, v3.8h
-; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: cmhi v4.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: cmhi v5.8h, v1.8h, v3.8h
; CHECK-GI-NEXT: cmhi v0.8h, v2.8h, v0.8h
; CHECK-GI-NEXT: cmhi v1.8h, v3.8h, v1.8h
-; CHECK-GI-NEXT: and v5.16b, v4.16b, v5.16b
-; CHECK-GI-NEXT: and v4.16b, v4.16b, v6.16b
-; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v5.16b
-; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v4.16b
+; CHECK-GI-NEXT: sub v0.8h, v0.8h, v4.8h
+; CHECK-GI-NEXT: sub v1.8h, v1.8h, v5.8h
; CHECK-GI-NEXT: ret
entry:
%c = call <16 x i16> @llvm.ucmp(<16 x i16> %a, <16 x i16> %b)
@@ -287,44 +243,24 @@ entry:
}
define <2 x i32> @u_v2i32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-SD-LABEL: u_v2i32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.2s, v0.2s, v1.2s
-; CHECK-SD-NEXT: cmhi v0.2s, v1.2s, v0.2s
-; CHECK-SD-NEXT: sub v0.2s, v0.2s, v2.2s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: u_v2i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.2s, #1
-; CHECK-GI-NEXT: cmhi v3.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmhi v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b
-; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: u_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.2s, v0.2s, v1.2s
+; CHECK-NEXT: cmhi v0.2s, v1.2s, v0.2s
+; CHECK-NEXT: sub v0.2s, v0.2s, v2.2s
+; CHECK-NEXT: ret
entry:
%c = call <2 x i32> @llvm.ucmp(<2 x i32> %a, <2 x i32> %b)
ret <2 x i32> %c
}
define <4 x i32> @u_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SD-LABEL: u_v4i32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: cmhi v0.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT: sub v0.4s, v0.4s, v2.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: u_v4i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.4s, #1
-; CHECK-GI-NEXT: cmhi v3.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmhi v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: u_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%c = call <4 x i32> @llvm.ucmp(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %c
@@ -343,16 +279,12 @@ define <8 x i32> @u_v8i32(<8 x i32> %a, <8 x i32> %b) {
;
; CHECK-GI-LABEL: u_v8i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v4.4s, #1
-; CHECK-GI-NEXT: cmhi v5.4s, v0.4s, v2.4s
-; CHECK-GI-NEXT: cmhi v6.4s, v1.4s, v3.4s
-; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: cmhi v4.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: cmhi v5.4s, v1.4s, v3.4s
; CHECK-GI-NEXT: cmhi v0.4s, v2.4s, v0.4s
; CHECK-GI-NEXT: cmhi v1.4s, v3.4s, v1.4s
-; CHECK-GI-NEXT: and v5.16b, v4.16b, v5.16b
-; CHECK-GI-NEXT: and v4.16b, v4.16b, v6.16b
-; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v5.16b
-; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v4.16b
+; CHECK-GI-NEXT: sub v0.4s, v0.4s, v4.4s
+; CHECK-GI-NEXT: sub v1.4s, v1.4s, v5.4s
; CHECK-GI-NEXT: ret
entry:
%c = call <8 x i32> @llvm.ucmp(<8 x i32> %a, <8 x i32> %b)
@@ -360,23 +292,12 @@ entry:
}
define <2 x i64> @u_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SD-LABEL: u_v2i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v2.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT: cmhi v0.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT: sub v0.2d, v0.2d, v2.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: u_v2i64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI17_0
-; CHECK-GI-NEXT: cmhi v2.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI17_0]
-; CHECK-GI-NEXT: cmhi v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: and v2.16b, v3.16b, v2.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: u_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmhi v2.2d, v0.2d, v1.2d
+; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: sub v0.2d, v0.2d, v2.2d
+; CHECK-NEXT: ret
entry:
%c = call <2 x i64> @llvm.ucmp(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %c
@@ -395,17 +316,12 @@ define <4 x i64> @u_v4i64(<4 x i64> %a, <4 x i64> %b) {
;
; CHECK-GI-LABEL: u_v4i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI18_0
; CHECK-GI-NEXT: cmhi v4.2d, v0.2d, v2.2d
-; CHECK-GI-NEXT: cmhi v6.2d, v1.2d, v3.2d
-; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI18_0]
-; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: cmhi v5.2d, v1.2d, v3.2d
; CHECK-GI-NEXT: cmhi v0.2d, v2.2d, v0.2d
; CHECK-GI-NEXT: cmhi v1.2d, v3.2d, v1.2d
-; CHECK-GI-NEXT: and v4.16b, v5.16b, v4.16b
-; CHECK-GI-NEXT: and v5.16b, v5.16b, v6.16b
-; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v4.16b
-; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v5.16b
+; CHECK-GI-NEXT: sub v0.2d, v0.2d, v4.2d
+; CHECK-GI-NEXT: sub v1.2d, v1.2d, v5.2d
; CHECK-GI-NEXT: ret
entry:
%c = call <4 x i64> @llvm.ucmp(<4 x i64> %a, <4 x i64> %b)
@@ -430,16 +346,13 @@ define <16 x i8> @signOf_neon(<8 x i16> %s0_lo, <8 x i16> %s0_hi, <8 x i16> %s1_
; CHECK-GI-NEXT: cmhi v5.8h, v1.8h, v3.8h
; CHECK-GI-NEXT: cmhi v0.8h, v2.8h, v0.8h
; CHECK-GI-NEXT: cmhi v1.8h, v3.8h, v1.8h
-; CHECK-GI-NEXT: movi v2.16b, #1
-; CHECK-GI-NEXT: movi v3.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: uzp1 v4.16b, v4.16b, v5.16b
; CHECK-GI-NEXT: uzp1 v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: shl v1.16b, v4.16b, #7
+; CHECK-GI-NEXT: uzp1 v1.16b, v4.16b, v5.16b
; CHECK-GI-NEXT: shl v0.16b, v0.16b, #7
-; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7
+; CHECK-GI-NEXT: shl v1.16b, v1.16b, #7
; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7
-; CHECK-GI-NEXT: and v1.16b, v2.16b, v1.16b
-; CHECK-GI-NEXT: bsl v0.16b, v3.16b, v1.16b
+; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7
+; CHECK-GI-NEXT: sub v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
%0 = shufflevector <8 x i16> %s0_lo, <8 x i16> %s0_hi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir
index 5c3d7e5975f1fd..4ffca796a4c20b 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir
@@ -12,14 +12,10 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]]
- ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]]
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]]
- ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[ICMP]], [[ICMP1]]
+ ; CHECK-NEXT: $x10 = COPY [[SUB]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s32) = COPY $x10
%1:_(s32) = COPY $x11
@@ -40,14 +36,10 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]]
- ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]]
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]]
- ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[ICMP]], [[ICMP1]]
+ ; CHECK-NEXT: $x10 = COPY [[SUB]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s32) = COPY $x10
%1:_(s32) = COPY $x11
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir
index ccade88ffae7c8..9e60a767c55feb 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir
@@ -12,20 +12,13 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C1]](s64)
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32
; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 32
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sgt), [[SEXT_INREG]](s64), [[SEXT_INREG1]]
- ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s64), [[TRUNC]], [[TRUNC1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64)
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(slt), [[SEXT_INREG]](s64), [[SEXT_INREG1]]
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s64), [[TRUNC2]], [[SELECT]]
- ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SELECT1]](s32)
- ; CHECK-NEXT: $x10 = COPY [[SEXT]](s64)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[ICMP]], [[ICMP1]]
+ ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 32
+ ; CHECK-NEXT: $x10 = COPY [[SEXT_INREG2]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
%2:_(s64) = COPY $x10
%0:_(s32) = G_TRUNC %2(s64)
@@ -48,20 +41,13 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C1]](s64)
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32
; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 32
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sgt), [[SEXT_INREG]](s64), [[SEXT_INREG1]]
- ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s64), [[TRUNC]], [[TRUNC1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64)
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(slt), [[SEXT_INREG]](s64), [[SEXT_INREG1]]
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s64), [[TRUNC2]], [[SELECT]]
- ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SELECT1]](s32)
- ; CHECK-NEXT: $x10 = COPY [[SEXT]](s64)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[ICMP]], [[ICMP1]]
+ ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 32
+ ; CHECK-NEXT: $x10 = COPY [[SEXT_INREG2]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
%2:_(s64) = COPY $x10
%0:_(s32) = G_TRUNC %2(s64)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll
index 0f2b6281b6f88c..4346e04ecda667 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll
@@ -5,34 +5,16 @@
define i8 @scmp.8.8(i8 signext %x, i8 signext %y) nounwind {
; RV32I-LABEL: scmp.8.8:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: blt a1, a0, .LBB0_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: blt a2, a1, .LBB0_3
-; RV32I-NEXT: j .LBB0_4
-; RV32I-NEXT: .LBB0_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bge a2, a1, .LBB0_4
-; RV32I-NEXT: .LBB0_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB0_4:
+; RV32I-NEXT: slt a2, a1, a0
+; RV32I-NEXT: slt a0, a0, a1
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.8.8:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: blt a1, a0, .LBB0_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: blt a2, a1, .LBB0_3
-; RV64I-NEXT: j .LBB0_4
-; RV64I-NEXT: .LBB0_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bge a2, a1, .LBB0_4
-; RV64I-NEXT: .LBB0_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB0_4:
+; RV64I-NEXT: slt a2, a1, a0
+; RV64I-NEXT: slt a0, a0, a1
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i8 @llvm.scmp(i8 %x, i8 %y)
ret i8 %1
@@ -41,34 +23,16 @@ define i8 @scmp.8.8(i8 signext %x, i8 signext %y) nounwind {
define i8 @scmp.8.16(i16 signext %x, i16 signext %y) nounwind {
; RV32I-LABEL: scmp.8.16:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: blt a1, a0, .LBB1_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: blt a2, a1, .LBB1_3
-; RV32I-NEXT: j .LBB1_4
-; RV32I-NEXT: .LBB1_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bge a2, a1, .LBB1_4
-; RV32I-NEXT: .LBB1_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB1_4:
+; RV32I-NEXT: slt a2, a1, a0
+; RV32I-NEXT: slt a0, a0, a1
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.8.16:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: blt a1, a0, .LBB1_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: blt a2, a1, .LBB1_3
-; RV64I-NEXT: j .LBB1_4
-; RV64I-NEXT: .LBB1_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bge a2, a1, .LBB1_4
-; RV64I-NEXT: .LBB1_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB1_4:
+; RV64I-NEXT: slt a2, a1, a0
+; RV64I-NEXT: slt a0, a0, a1
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i8 @llvm.scmp(i16 %x, i16 %y)
ret i8 %1
@@ -77,35 +41,18 @@ define i8 @scmp.8.16(i16 signext %x, i16 signext %y) nounwind {
define i8 @scmp.8.32(i32 %x, i32 %y) nounwind {
; RV32I-LABEL: scmp.8.32:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: blt a1, a0, .LBB2_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: blt a2, a1, .LBB2_3
-; RV32I-NEXT: j .LBB2_4
-; RV32I-NEXT: .LBB2_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bge a2, a1, .LBB2_4
-; RV32I-NEXT: .LBB2_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB2_4:
+; RV32I-NEXT: slt a2, a1, a0
+; RV32I-NEXT: slt a0, a0, a1
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.8.32:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a2, a0
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: sext.w a1, a1
-; RV64I-NEXT: blt a1, a2, .LBB2_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: blt a2, a1, .LBB2_3
-; RV64I-NEXT: j .LBB2_4
-; RV64I-NEXT: .LBB2_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bge a2, a1, .LBB2_4
-; RV64I-NEXT: .LBB2_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB2_4:
+; RV64I-NEXT: slt a2, a1, a0
+; RV64I-NEXT: slt a0, a0, a1
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
ret i8 %1
@@ -117,42 +64,20 @@ define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: beq a1, a3, .LBB3_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: slt a4, a3, a1
-; RV32I-NEXT: bnez a4, .LBB3_3
-; RV32I-NEXT: j .LBB3_4
+; RV32I-NEXT: slt a0, a1, a3
+; RV32I-NEXT: sub a0, a4, a0
+; RV32I-NEXT: ret
; RV32I-NEXT: .LBB3_2:
; RV32I-NEXT: sltu a4, a2, a0
-; RV32I-NEXT: beqz a4, .LBB3_4
-; RV32I-NEXT: .LBB3_3:
-; RV32I-NEXT: li a4, 1
-; RV32I-NEXT: .LBB3_4:
-; RV32I-NEXT: beq a1, a3, .LBB3_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: slt a0, a1, a3
-; RV32I-NEXT: bnez a0, .LBB3_7
-; RV32I-NEXT: j .LBB3_8
-; RV32I-NEXT: .LBB3_6:
; RV32I-NEXT: sltu a0, a0, a2
-; RV32I-NEXT: beqz a0, .LBB3_8
-; RV32I-NEXT: .LBB3_7:
-; RV32I-NEXT: li a4, -1
-; RV32I-NEXT: .LBB3_8:
-; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: sub a0, a4, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.8.64:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: blt a1, a0, .LBB3_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: blt a2, a1, .LBB3_3
-; RV64I-NEXT: j .LBB3_4
-; RV64I-NEXT: .LBB3_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bge a2, a1, .LBB3_4
-; RV64I-NEXT: .LBB3_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB3_4:
+; RV64I-NEXT: slt a2, a1, a0
+; RV64I-NEXT: slt a0, a0, a1
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i8 @llvm.scmp(i64 %x, i64 %y)
ret i8 %1
@@ -161,35 +86,18 @@ define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
define i32 @scmp.32.32(i32 %x, i32 %y) nounwind {
; RV32I-LABEL: scmp.32.32:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: blt a1, a0, .LBB4_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: blt a2, a1, .LBB4_3
-; RV32I-NEXT: j .LBB4_4
-; RV32I-NEXT: .LBB4_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bge a2, a1, .LBB4_4
-; RV32I-NEXT: .LBB4_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB4_4:
+; RV32I-NEXT: slt a2, a1, a0
+; RV32I-NEXT: slt a0, a0, a1
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.32.32:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a2, a0
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: sext.w a1, a1
-; RV64I-NEXT: blt a1, a2, .LBB4_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: blt a2, a1, .LBB4_3
-; RV64I-NEXT: j .LBB4_4
-; RV64I-NEXT: .LBB4_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bge a2, a1, .LBB4_4
-; RV64I-NEXT: .LBB4_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB4_4:
+; RV64I-NEXT: slt a2, a1, a0
+; RV64I-NEXT: slt a0, a0, a1
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i32 @llvm.scmp(i32 %x, i32 %y)
ret i32 %1
@@ -201,42 +109,20 @@ define i32 @scmp.32.64(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: beq a1, a3, .LBB5_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: slt a4, a3, a1
-; RV32I-NEXT: bnez a4, .LBB5_3
-; RV32I-NEXT: j .LBB5_4
+; RV32I-NEXT: slt a0, a1, a3
+; RV32I-NEXT: sub a0, a4, a0
+; RV32I-NEXT: ret
; RV32I-NEXT: .LBB5_2:
; RV32I-NEXT: sltu a4, a2, a0
-; RV32I-NEXT: beqz a4, .LBB5_4
-; RV32I-NEXT: .LBB5_3:
-; RV32I-NEXT: li a4, 1
-; RV32I-NEXT: .LBB5_4:
-; RV32I-NEXT: beq a1, a3, .LBB5_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: slt a0, a1, a3
-; RV32I-NEXT: bnez a0, .LBB5_7
-; RV32I-NEXT: j .LBB5_8
-; RV32I-NEXT: .LBB5_6:
; RV32I-NEXT: sltu a0, a0, a2
-; RV32I-NEXT: beqz a0, .LBB5_8
-; RV32I-NEXT: .LBB5_7:
-; RV32I-NEXT: li a4, -1
-; RV32I-NEXT: .LBB5_8:
-; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: sub a0, a4, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.32.64:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: blt a1, a0, .LBB5_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: blt a2, a1, .LBB5_3
-; RV64I-NEXT: j .LBB5_4
-; RV64I-NEXT: .LBB5_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bge a2, a1, .LBB5_4
-; RV64I-NEXT: .LBB5_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB5_4:
+; RV64I-NEXT: slt a2, a1, a0
+; RV64I-NEXT: slt a0, a0, a1
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i32 @llvm.scmp(i64 %x, i64 %y)
ret i32 %1
@@ -245,46 +131,25 @@ define i32 @scmp.32.64(i64 %x, i64 %y) nounwind {
define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
; RV32I-LABEL: scmp.64.64:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a4, a0
; RV32I-NEXT: beq a1, a3, .LBB6_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slt a0, a3, a1
-; RV32I-NEXT: bnez a0, .LBB6_3
-; RV32I-NEXT: j .LBB6_4
+; RV32I-NEXT: slt a4, a3, a1
+; RV32I-NEXT: slt a1, a1, a3
+; RV32I-NEXT: j .LBB6_3
; RV32I-NEXT: .LBB6_2:
-; RV32I-NEXT: sltu a0, a2, a4
-; RV32I-NEXT: beqz a0, .LBB6_4
+; RV32I-NEXT: sltu a4, a2, a0
+; RV32I-NEXT: sltu a1, a0, a2
; RV32I-NEXT: .LBB6_3:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: .LBB6_4:
-; RV32I-NEXT: beq a1, a3, .LBB6_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: slt a1, a1, a3
-; RV32I-NEXT: bnez a1, .LBB6_7
-; RV32I-NEXT: j .LBB6_8
-; RV32I-NEXT: .LBB6_6:
-; RV32I-NEXT: sltu a1, a4, a2
-; RV32I-NEXT: beqz a1, .LBB6_8
-; RV32I-NEXT: .LBB6_7:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: li a1, -1
-; RV32I-NEXT: .LBB6_8:
+; RV32I-NEXT: sub a0, a4, a1
+; RV32I-NEXT: sltu a1, a4, a1
+; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: scmp.64.64:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: blt a1, a0, .LBB6_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: blt a2, a1, .LBB6_3
-; RV64I-NEXT: j .LBB6_4
-; RV64I-NEXT: .LBB6_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bge a2, a1, .LBB6_4
-; RV64I-NEXT: .LBB6_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB6_4:
+; RV64I-NEXT: slt a2, a1, a0
+; RV64I-NEXT: slt a0, a0, a1
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i64 @llvm.scmp(i64 %x, i64 %y)
ret i64 %1
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll
index e2a95eb974342d..c3abf51fd05bc8 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll
@@ -5,34 +5,16 @@
define i8 @ucmp.8.8(i8 zeroext %x, i8 zeroext %y) nounwind {
; RV32I-LABEL: ucmp.8.8:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: bltu a1, a0, .LBB0_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a2, a1, .LBB0_3
-; RV32I-NEXT: j .LBB0_4
-; RV32I-NEXT: .LBB0_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bgeu a2, a1, .LBB0_4
-; RV32I-NEXT: .LBB0_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB0_4:
+; RV32I-NEXT: sltu a2, a1, a0
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.8.8:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: bltu a1, a0, .LBB0_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB0_3
-; RV64I-NEXT: j .LBB0_4
-; RV64I-NEXT: .LBB0_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a2, a1, .LBB0_4
-; RV64I-NEXT: .LBB0_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB0_4:
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
ret i8 %1
@@ -41,34 +23,16 @@ define i8 @ucmp.8.8(i8 zeroext %x, i8 zeroext %y) nounwind {
define i8 @ucmp.8.16(i16 zeroext %x, i16 zeroext %y) nounwind {
; RV32I-LABEL: ucmp.8.16:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: bltu a1, a0, .LBB1_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a2, a1, .LBB1_3
-; RV32I-NEXT: j .LBB1_4
-; RV32I-NEXT: .LBB1_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bgeu a2, a1, .LBB1_4
-; RV32I-NEXT: .LBB1_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB1_4:
+; RV32I-NEXT: sltu a2, a1, a0
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.8.16:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: bltu a1, a0, .LBB1_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB1_3
-; RV64I-NEXT: j .LBB1_4
-; RV64I-NEXT: .LBB1_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a2, a1, .LBB1_4
-; RV64I-NEXT: .LBB1_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB1_4:
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
ret i8 %1
@@ -77,37 +41,20 @@ define i8 @ucmp.8.16(i16 zeroext %x, i16 zeroext %y) nounwind {
define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind {
; RV32I-LABEL: ucmp.8.32:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: bltu a1, a0, .LBB2_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a2, a1, .LBB2_3
-; RV32I-NEXT: j .LBB2_4
-; RV32I-NEXT: .LBB2_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bgeu a2, a1, .LBB2_4
-; RV32I-NEXT: .LBB2_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB2_4:
+; RV32I-NEXT: sltu a2, a1, a0
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.8.32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: srli a2, a2, 32
-; RV64I-NEXT: bltu a2, a1, .LBB2_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a1, a2, .LBB2_3
-; RV64I-NEXT: j .LBB2_4
-; RV64I-NEXT: .LBB2_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a1, a2, .LBB2_4
-; RV64I-NEXT: .LBB2_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB2_4:
+; RV64I-NEXT: slli a1, a1, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: srli a1, a1, 32
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
ret i8 %1
@@ -119,42 +66,20 @@ define i8 @ucmp.8.64(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: beq a1, a3, .LBB3_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sltu a4, a3, a1
-; RV32I-NEXT: bnez a4, .LBB3_3
-; RV32I-NEXT: j .LBB3_4
+; RV32I-NEXT: sltu a0, a1, a3
+; RV32I-NEXT: sub a0, a4, a0
+; RV32I-NEXT: ret
; RV32I-NEXT: .LBB3_2:
; RV32I-NEXT: sltu a4, a2, a0
-; RV32I-NEXT: beqz a4, .LBB3_4
-; RV32I-NEXT: .LBB3_3:
-; RV32I-NEXT: li a4, 1
-; RV32I-NEXT: .LBB3_4:
-; RV32I-NEXT: beq a1, a3, .LBB3_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: sltu a0, a1, a3
-; RV32I-NEXT: bnez a0, .LBB3_7
-; RV32I-NEXT: j .LBB3_8
-; RV32I-NEXT: .LBB3_6:
; RV32I-NEXT: sltu a0, a0, a2
-; RV32I-NEXT: beqz a0, .LBB3_8
-; RV32I-NEXT: .LBB3_7:
-; RV32I-NEXT: li a4, -1
-; RV32I-NEXT: .LBB3_8:
-; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: sub a0, a4, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.8.64:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: bltu a1, a0, .LBB3_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB3_3
-; RV64I-NEXT: j .LBB3_4
-; RV64I-NEXT: .LBB3_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a2, a1, .LBB3_4
-; RV64I-NEXT: .LBB3_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB3_4:
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
ret i8 %1
@@ -163,37 +88,20 @@ define i8 @ucmp.8.64(i64 %x, i64 %y) nounwind {
define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
; RV32I-LABEL: ucmp.32.32:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: bltu a1, a0, .LBB4_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a2, a1, .LBB4_3
-; RV32I-NEXT: j .LBB4_4
-; RV32I-NEXT: .LBB4_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bgeu a2, a1, .LBB4_4
-; RV32I-NEXT: .LBB4_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB4_4:
+; RV32I-NEXT: sltu a2, a1, a0
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.32.32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: srli a2, a2, 32
-; RV64I-NEXT: bltu a2, a1, .LBB4_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a1, a2, .LBB4_3
-; RV64I-NEXT: j .LBB4_4
-; RV64I-NEXT: .LBB4_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a1, a2, .LBB4_4
-; RV64I-NEXT: .LBB4_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB4_4:
+; RV64I-NEXT: slli a1, a1, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: srli a1, a1, 32
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
@@ -202,37 +110,20 @@ define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
define i32 @ucmp.32.32_sext(i32 signext %x, i32 signext %y) nounwind {
; RV32I-LABEL: ucmp.32.32_sext:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: bltu a1, a0, .LBB5_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a2, a1, .LBB5_3
-; RV32I-NEXT: j .LBB5_4
-; RV32I-NEXT: .LBB5_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bgeu a2, a1, .LBB5_4
-; RV32I-NEXT: .LBB5_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB5_4:
+; RV32I-NEXT: sltu a2, a1, a0
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.32.32_sext:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: srli a2, a2, 32
-; RV64I-NEXT: bltu a2, a1, .LBB5_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a1, a2, .LBB5_3
-; RV64I-NEXT: j .LBB5_4
-; RV64I-NEXT: .LBB5_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a1, a2, .LBB5_4
-; RV64I-NEXT: .LBB5_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB5_4:
+; RV64I-NEXT: slli a1, a1, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: srli a1, a1, 32
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
@@ -241,34 +132,16 @@ define i32 @ucmp.32.32_sext(i32 signext %x, i32 signext %y) nounwind {
define i32 @ucmp.32.32_zext(i32 zeroext %x, i32 zeroext %y) nounwind {
; RV32I-LABEL: ucmp.32.32_zext:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a2, a0
-; RV32I-NEXT: bltu a1, a0, .LBB6_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 0
-; RV32I-NEXT: bltu a2, a1, .LBB6_3
-; RV32I-NEXT: j .LBB6_4
-; RV32I-NEXT: .LBB6_2:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: bgeu a2, a1, .LBB6_4
-; RV32I-NEXT: .LBB6_3:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: .LBB6_4:
+; RV32I-NEXT: sltu a2, a1, a0
+; RV32I-NEXT: sltu a0, a0, a1
+; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.32.32_zext:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: bltu a1, a0, .LBB6_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB6_3
-; RV64I-NEXT: j .LBB6_4
-; RV64I-NEXT: .LBB6_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a2, a1, .LBB6_4
-; RV64I-NEXT: .LBB6_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB6_4:
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
@@ -280,42 +153,20 @@ define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
; RV32I-NEXT: beq a1, a3, .LBB7_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sltu a4, a3, a1
-; RV32I-NEXT: bnez a4, .LBB7_3
-; RV32I-NEXT: j .LBB7_4
+; RV32I-NEXT: sltu a0, a1, a3
+; RV32I-NEXT: sub a0, a4, a0
+; RV32I-NEXT: ret
; RV32I-NEXT: .LBB7_2:
; RV32I-NEXT: sltu a4, a2, a0
-; RV32I-NEXT: beqz a4, .LBB7_4
-; RV32I-NEXT: .LBB7_3:
-; RV32I-NEXT: li a4, 1
-; RV32I-NEXT: .LBB7_4:
-; RV32I-NEXT: beq a1, a3, .LBB7_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: sltu a0, a1, a3
-; RV32I-NEXT: bnez a0, .LBB7_7
-; RV32I-NEXT: j .LBB7_8
-; RV32I-NEXT: .LBB7_6:
; RV32I-NEXT: sltu a0, a0, a2
-; RV32I-NEXT: beqz a0, .LBB7_8
-; RV32I-NEXT: .LBB7_7:
-; RV32I-NEXT: li a4, -1
-; RV32I-NEXT: .LBB7_8:
-; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: sub a0, a4, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.32.64:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: bltu a1, a0, .LBB7_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB7_3
-; RV64I-NEXT: j .LBB7_4
-; RV64I-NEXT: .LBB7_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a2, a1, .LBB7_4
-; RV64I-NEXT: .LBB7_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB7_4:
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
ret i32 %1
@@ -324,46 +175,25 @@ define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind {
; RV32I-LABEL: ucmp.64.64:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a4, a0
; RV32I-NEXT: beq a1, a3, .LBB8_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu a0, a3, a1
-; RV32I-NEXT: bnez a0, .LBB8_3
-; RV32I-NEXT: j .LBB8_4
+; RV32I-NEXT: sltu a4, a3, a1
+; RV32I-NEXT: sltu a1, a1, a3
+; RV32I-NEXT: j .LBB8_3
; RV32I-NEXT: .LBB8_2:
-; RV32I-NEXT: sltu a0, a2, a4
-; RV32I-NEXT: beqz a0, .LBB8_4
+; RV32I-NEXT: sltu a4, a2, a0
+; RV32I-NEXT: sltu a1, a0, a2
; RV32I-NEXT: .LBB8_3:
-; RV32I-NEXT: li a0, 1
-; RV32I-NEXT: .LBB8_4:
-; RV32I-NEXT: beq a1, a3, .LBB8_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: sltu a1, a1, a3
-; RV32I-NEXT: bnez a1, .LBB8_7
-; RV32I-NEXT: j .LBB8_8
-; RV32I-NEXT: .LBB8_6:
-; RV32I-NEXT: sltu a1, a4, a2
-; RV32I-NEXT: beqz a1, .LBB8_8
-; RV32I-NEXT: .LBB8_7:
-; RV32I-NEXT: li a0, -1
-; RV32I-NEXT: li a1, -1
-; RV32I-NEXT: .LBB8_8:
+; RV32I-NEXT: sub a0, a4, a1
+; RV32I-NEXT: sltu a1, a4, a1
+; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: ucmp.64.64:
; RV64I: # %bb.0:
-; RV64I-NEXT: mv a2, a0
-; RV64I-NEXT: bltu a1, a0, .LBB8_2
-; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: bltu a2, a1, .LBB8_3
-; RV64I-NEXT: j .LBB8_4
-; RV64I-NEXT: .LBB8_2:
-; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: bgeu a2, a1, .LBB8_4
-; RV64I-NEXT: .LBB8_3:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: .LBB8_4:
+; RV64I-NEXT: sltu a2, a1, a0
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: sub a0, a2, a0
; RV64I-NEXT: ret
%1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
ret i64 %1
More information about the llvm-commits
mailing list