[llvm] [GISel][RISCV][AArch64] Support legalizing G_SCMP/G_UCMP to sub(isgt,islt). (PR #119265)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 9 12:35:55 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
Convert the LLT to EVT and call TargetLowering::shouldExpandCmpUsingSelects to determine if we should do this.
We don't have a getSetccResultType, so I'm boolean extending the compares to the result type and using that. If the compares legalize to the same type, these extends will get removed. Unfortunately, if the compares legalize to a different type, we end up with truncates or extends that might not be optimally placed. I wonder if we can work around this by adding widening/narrowing rules for the G_SCMP/G_UCMP before lowering?
---
Patch is 61.52 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119265.diff
8 Files Affected:
- (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+20-5)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir (+12-41)
- (modified) llvm/test/CodeGen/AArch64/scmp.ll (+59-147)
- (modified) llvm/test/CodeGen/AArch64/ucmp.ll (+62-151)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir (+4-12)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir (+6-20)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll (+51-186)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll (+70-240)
``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index cf835ad187f818..1f414169db022c 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7920,6 +7920,7 @@ LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) {
Register Dst = Cmp->getReg(0);
LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Cmp->getReg(1));
LLT CmpTy = DstTy.changeElementSize(1);
CmpInst::Predicate LTPredicate = Cmp->isSigned()
@@ -7929,16 +7930,30 @@ LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) {
? CmpInst::Predicate::ICMP_SGT
: CmpInst::Predicate::ICMP_UGT;
- auto One = MIRBuilder.buildConstant(DstTy, 1);
auto Zero = MIRBuilder.buildConstant(DstTy, 0);
auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
Cmp->getRHSReg());
- auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
-
- auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
Cmp->getRHSReg());
- MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
+
+ auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ if (TLI.shouldExpandCmpUsingSelects(
+ getApproximateEVTForLLT(SrcTy, DL, Ctx))) {
+ auto One = MIRBuilder.buildConstant(DstTy, 1);
+ auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
+
+ auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
+ MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
+ } else {
+ if (TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false))
+ std::swap(IsGT, IsLT);
+ unsigned BoolExtOp =
+ MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
+ IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
+ IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
+ MIRBuilder.buildSub(Dst, IsGT, IsLT);
+ }
MI.eraseFromParent();
return Legalized;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir
index e69f79bdd187a3..27d17310c24e9f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir
@@ -8,10 +8,10 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]]
+ ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]]
- ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]]
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT1]], 2
@@ -31,10 +31,10 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]]
+ ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]]
- ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]]
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT1]], 2
@@ -61,42 +61,13 @@ body: |
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $w2
; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $w3
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(ugt), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC]], [[UV]]
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR3]](<8 x s8>)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<4 x s16>), [[UV3:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV2]], [[TRUNC1]]
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR4]](<8 x s8>)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s16>), [[UV5:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV4]], [[XOR]]
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]]
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(ult), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C3]](s8), [[C3]](s8), [[C3]](s8), [[C3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR5]](<8 x s8>)
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s16>), [[UV7:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT3]](<8 x s16>)
- ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC2]], [[UV6]]
- ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C3]](s8), [[C3]](s8), [[C3]](s8), [[C3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR6]](<8 x s8>)
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT4]](<8 x s16>)
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV8]], [[TRUNC3]]
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s16>) = G_AND [[OR]], [[XOR1]]
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND2]], [[AND3]]
- ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR1]](<4 x s16>)
- ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<4 x s32>) = G_SEXT_INREG [[ANYEXT5]], 2
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<4 x s16>) = G_SUB [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[SUB]](<4 x s16>)
+ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<4 x s32>) = G_SEXT_INREG [[ANYEXT]], 2
; CHECK-NEXT: $q0 = COPY [[SEXT_INREG]](<4 x s32>)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
@@ -125,15 +96,15 @@ body: |
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[DEF]](s64), [[DEF]]
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]]
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[SELECT]](s32), [[C]], [[C1]]
; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[DEF]](s64), [[DEF]]
; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[DEF]](s64), [[DEF]]
; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]]
- ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s32), [[ICMP5]], [[ICMP3]]
+ ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s32), [[ICMP5]], [[ICMP3]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[SELECT]](s32), [[C]], [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
- ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[SELECT2]](s32), [[C2]], [[SELECT1]]
+ ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[SELECT1]](s32), [[C2]], [[SELECT2]]
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT3]], 2
; CHECK-NEXT: $w0 = COPY [[SEXT_INREG]](s32)
%0:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/scmp.ll b/llvm/test/CodeGen/AArch64/scmp.ll
index 4aff5a836e1a18..9930dea4e37e2f 100644
--- a/llvm/test/CodeGen/AArch64/scmp.ll
+++ b/llvm/test/CodeGen/AArch64/scmp.ll
@@ -85,14 +85,13 @@ define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
; CHECK-GI-NEXT: cset w9, hi
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: csel w8, w9, w8, eq
-; CHECK-GI-NEXT: tst w8, #0x1
-; CHECK-GI-NEXT: cset w8, ne
-; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: cset w9, lt
; CHECK-GI-NEXT: cmp x0, x2
; CHECK-GI-NEXT: cset w10, lo
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: csel w9, w10, w9, eq
+; CHECK-GI-NEXT: tst w8, #0x1
+; CHECK-GI-NEXT: cset w8, ne
; CHECK-GI-NEXT: tst w9, #0x1
; CHECK-GI-NEXT: csinv w0, w8, wzr, eq
; CHECK-GI-NEXT: ret
@@ -134,88 +133,48 @@ define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
}
define <8 x i8> @s_v8i8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-SD-LABEL: s_v8i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT: cmgt v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT: sub v0.8b, v0.8b, v2.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v8i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.8b, #1
-; CHECK-GI-NEXT: cmgt v3.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b
-; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v8i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.8b, v0.8b, v1.8b
+; CHECK-NEXT: cmgt v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: sub v0.8b, v0.8b, v2.8b
+; CHECK-NEXT: ret
entry:
%c = call <8 x i8> @llvm.scmp(<8 x i8> %a, <8 x i8> %b)
ret <8 x i8> %c
}
define <16 x i8> @s_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SD-LABEL: s_v16i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT: cmgt v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT: sub v0.16b, v0.16b, v2.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v16i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.16b, #1
-; CHECK-GI-NEXT: cmgt v3.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.16b, v0.16b, v1.16b
+; CHECK-NEXT: cmgt v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: sub v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: ret
entry:
%c = call <16 x i8> @llvm.scmp(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %c
}
define <4 x i16> @s_v4i16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-SD-LABEL: s_v4i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.4h, v0.4h, v1.4h
-; CHECK-SD-NEXT: cmgt v0.4h, v1.4h, v0.4h
-; CHECK-SD-NEXT: sub v0.4h, v0.4h, v2.4h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v4i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.4h, #1
-; CHECK-GI-NEXT: cmgt v3.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.4h, v1.4h, v0.4h
-; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b
-; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.4h, v0.4h, v1.4h
+; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h
+; CHECK-NEXT: sub v0.4h, v0.4h, v2.4h
+; CHECK-NEXT: ret
entry:
%c = call <4 x i16> @llvm.scmp(<4 x i16> %a, <4 x i16> %b)
ret <4 x i16> %c
}
define <8 x i16> @s_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SD-LABEL: s_v8i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.8h, v0.8h, v1.8h
-; CHECK-SD-NEXT: cmgt v0.8h, v1.8h, v0.8h
-; CHECK-SD-NEXT: sub v0.8h, v0.8h, v2.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v8i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.8h, #1
-; CHECK-GI-NEXT: cmgt v3.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.8h, v1.8h, v0.8h
-; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.8h, v0.8h, v1.8h
+; CHECK-NEXT: cmgt v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: sub v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: ret
entry:
%c = call <8 x i16> @llvm.scmp(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %c
@@ -234,16 +193,12 @@ define <16 x i16> @s_v16i16(<16 x i16> %a, <16 x i16> %b) {
;
; CHECK-GI-LABEL: s_v16i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v4.8h, #1
-; CHECK-GI-NEXT: cmgt v5.8h, v0.8h, v2.8h
-; CHECK-GI-NEXT: cmgt v6.8h, v1.8h, v3.8h
-; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: cmgt v4.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: cmgt v5.8h, v1.8h, v3.8h
; CHECK-GI-NEXT: cmgt v0.8h, v2.8h, v0.8h
; CHECK-GI-NEXT: cmgt v1.8h, v3.8h, v1.8h
-; CHECK-GI-NEXT: and v5.16b, v4.16b, v5.16b
-; CHECK-GI-NEXT: and v4.16b, v4.16b, v6.16b
-; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v5.16b
-; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v4.16b
+; CHECK-GI-NEXT: sub v0.8h, v0.8h, v4.8h
+; CHECK-GI-NEXT: sub v1.8h, v1.8h, v5.8h
; CHECK-GI-NEXT: ret
entry:
%c = call <16 x i16> @llvm.scmp(<16 x i16> %a, <16 x i16> %b)
@@ -251,44 +206,24 @@ entry:
}
define <2 x i32> @s_v2i32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-SD-LABEL: s_v2i32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.2s, v0.2s, v1.2s
-; CHECK-SD-NEXT: cmgt v0.2s, v1.2s, v0.2s
-; CHECK-SD-NEXT: sub v0.2s, v0.2s, v2.2s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v2i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.2s, #1
-; CHECK-GI-NEXT: cmgt v3.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b
-; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.2s, v0.2s, v1.2s
+; CHECK-NEXT: cmgt v0.2s, v1.2s, v0.2s
+; CHECK-NEXT: sub v0.2s, v0.2s, v2.2s
+; CHECK-NEXT: ret
entry:
%c = call <2 x i32> @llvm.scmp(<2 x i32> %a, <2 x i32> %b)
ret <2 x i32> %c
}
define <4 x i32> @s_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SD-LABEL: s_v4i32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: cmgt v0.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT: sub v0.4s, v0.4s, v2.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v4i32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v2.4s, #1
-; CHECK-GI-NEXT: cmgt v3.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: ret
entry:
%c = call <4 x i32> @llvm.scmp(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %c
@@ -307,16 +242,12 @@ define <8 x i32> @s_v8i32(<8 x i32> %a, <8 x i32> %b) {
;
; CHECK-GI-LABEL: s_v8i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: movi v4.4s, #1
-; CHECK-GI-NEXT: cmgt v5.4s, v0.4s, v2.4s
-; CHECK-GI-NEXT: cmgt v6.4s, v1.4s, v3.4s
-; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: cmgt v4.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: cmgt v5.4s, v1.4s, v3.4s
; CHECK-GI-NEXT: cmgt v0.4s, v2.4s, v0.4s
; CHECK-GI-NEXT: cmgt v1.4s, v3.4s, v1.4s
-; CHECK-GI-NEXT: and v5.16b, v4.16b, v5.16b
-; CHECK-GI-NEXT: and v4.16b, v4.16b, v6.16b
-; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v5.16b
-; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v4.16b
+; CHECK-GI-NEXT: sub v0.4s, v0.4s, v4.4s
+; CHECK-GI-NEXT: sub v1.4s, v1.4s, v5.4s
; CHECK-GI-NEXT: ret
entry:
%c = call <8 x i32> @llvm.scmp(<8 x i32> %a, <8 x i32> %b)
@@ -324,23 +255,12 @@ entry:
}
define <2 x i64> @s_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SD-LABEL: s_v2i64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v2.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT: cmgt v0.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT: sub v0.2d, v0.2d, v2.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: s_v2i64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI16_0
-; CHECK-GI-NEXT: cmgt v2.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI16_0]
-; CHECK-GI-NEXT: cmgt v0.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: and v2.16b, v3.16b, v2.16b
-; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: s_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d
+; CHECK-NEXT: cmgt v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: sub v0.2d, v0.2d, v2.2d
+; CHECK-NEXT: ret
entry:
%c = call <2 x i64> @llvm.scmp(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %c
@@ -359,17 +279,12 @@ define <4 x i64> @s_v4i64(<4 x i64> %a, <4 x i64> %b) {
;
; CHECK-GI-LABEL: s_v4i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI17_0
; CHECK-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d
-; CHECK-GI-NEXT: cmgt v6.2d, v1.2d, v3.2d
-; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI17_0]
-; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: cmgt v5.2d, v1.2d, v3.2d
; CHECK-GI-NEXT: cmgt v0.2d, v2.2d, v0.2d
; CHECK-GI-NEXT: cmgt v1.2d, v3.2d, v1.2d
-; CHECK-GI-NEXT: and v4.16b, v5.16b, v4.16b
-; CHECK-GI-NEXT: and v5.16b, v5.16b, v6.16b
-; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v4.16b
-; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v5.16b
+; CHECK-GI-NEXT: sub v0.2d, v0.2d, v4.2d
+; CHECK-G...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/119265
More information about the llvm-commits
mailing list