[llvm] 43a0016 - Extend `performANDCSELCombine` to `performANDORCSELCombine`
Karl Meakin via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 4 07:11:00 PST 2022
Author: Karl Meakin
Date: 2022-03-04T15:09:59Z
New Revision: 43a0016f3dcff9d3932fb8d97e99460333ba7107
URL: https://github.com/llvm/llvm-project/commit/43a0016f3dcff9d3932fb8d97e99460333ba7107
DIFF: https://github.com/llvm/llvm-project/commit/43a0016f3dcff9d3932fb8d97e99460333ba7107.diff
LOG: Extend `performANDCSELCombine` to `performANDORCSELCombine`
Differential Revision: https://reviews.llvm.org/D120422
Added:
llvm/test/CodeGen/AArch64/cmp-chains.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/arm64-ccmp.ll
llvm/test/CodeGen/AArch64/arm64-fp128.ll
llvm/test/CodeGen/AArch64/select-with-and-or.ll
llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
llvm/test/CodeGen/AArch64/vec_umulo.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 465944dfdb0f9..d6a87526c5c38 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14034,15 +14034,85 @@ static SDValue tryCombineToBSL(SDNode *N,
return SDValue();
}
+// Given a tree of and/or(csel(0, 1, cc0), csel(0, 1, cc1)), we may be able to
+// convert to csel(ccmp(.., cc0)), depending on cc1:
+
+// (AND (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1)))
+// =>
+// (CSET cc1 (CCMP x1 y1 !cc1 cc0 cmp0))
+//
+// (OR (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1)))
+// =>
+// (CSET cc1 (CCMP x1 y1 cc1 !cc0 cmp0))
+static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ SDValue CSel0 = N->getOperand(0);
+ SDValue CSel1 = N->getOperand(1);
+
+ if (CSel0.getOpcode() != AArch64ISD::CSEL ||
+ CSel1.getOpcode() != AArch64ISD::CSEL)
+ return SDValue();
+
+ if (!CSel0->hasOneUse() || !CSel1->hasOneUse())
+ return SDValue();
+
+ if (!isNullConstant(CSel0.getOperand(0)) ||
+ !isOneConstant(CSel0.getOperand(1)) ||
+ !isNullConstant(CSel1.getOperand(0)) ||
+ !isOneConstant(CSel1.getOperand(1)))
+ return SDValue();
+
+ SDValue Cmp0 = CSel0.getOperand(3);
+ SDValue Cmp1 = CSel1.getOperand(3);
+ AArch64CC::CondCode CC0 = (AArch64CC::CondCode)CSel0.getConstantOperandVal(2);
+ AArch64CC::CondCode CC1 = (AArch64CC::CondCode)CSel1.getConstantOperandVal(2);
+ if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
+ return SDValue();
+ if (Cmp1.getOpcode() != AArch64ISD::SUBS &&
+ Cmp0.getOpcode() == AArch64ISD::SUBS) {
+ std::swap(Cmp0, Cmp1);
+ std::swap(CC0, CC1);
+ }
+
+ if (Cmp1.getOpcode() != AArch64ISD::SUBS)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue CCmp;
+
+ if (N->getOpcode() == ISD::AND) {
+ AArch64CC::CondCode InvCC0 = AArch64CC::getInvertedCondCode(CC0);
+ SDValue Condition = DAG.getConstant(InvCC0, DL, MVT_CC);
+ unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CC1);
+ SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
+ CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
+ Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
+ } else {
+ SDLoc DL(N);
+ AArch64CC::CondCode InvCC1 = AArch64CC::getInvertedCondCode(CC1);
+ SDValue Condition = DAG.getConstant(CC0, DL, MVT_CC);
+ unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvCC1);
+ SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
+ CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
+ Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
+ }
+ return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0),
+ CSel0.getOperand(1), DAG.getConstant(CC1, DL, MVT::i32),
+ CCmp);
+}
+
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
- // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
+ if (SDValue R = performANDORCSELCombine(N, DAG))
+ return R;
+
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
+ // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
if (SDValue Res = tryCombineToEXTR(N, DCI))
return Res;
@@ -14171,60 +14241,13 @@ static SDValue performSVEAndCombine(SDNode *N,
return SDValue();
}
-// Given a tree of and(csel(0, 1, cc0), csel(0, 1, cc1)), we may be able to
-// convert to csel(ccmp(.., cc0)), depending on cc1.
-static SDValue PerformANDCSELCombine(SDNode *N, SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
- SDValue CSel0 = N->getOperand(0);
- SDValue CSel1 = N->getOperand(1);
-
- if (CSel0.getOpcode() != AArch64ISD::CSEL ||
- CSel1.getOpcode() != AArch64ISD::CSEL)
- return SDValue();
-
- if (!CSel0->hasOneUse() || !CSel1->hasOneUse())
- return SDValue();
-
- if (!isNullConstant(CSel0.getOperand(0)) ||
- !isOneConstant(CSel0.getOperand(1)) ||
- !isNullConstant(CSel1.getOperand(0)) ||
- !isOneConstant(CSel1.getOperand(1)))
- return SDValue();
-
- SDValue Cmp0 = CSel0.getOperand(3);
- SDValue Cmp1 = CSel1.getOperand(3);
- AArch64CC::CondCode CC0 = (AArch64CC::CondCode)CSel0.getConstantOperandVal(2);
- AArch64CC::CondCode CC1 = (AArch64CC::CondCode)CSel1.getConstantOperandVal(2);
- if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
- return SDValue();
- if (Cmp1.getOpcode() != AArch64ISD::SUBS &&
- Cmp0.getOpcode() == AArch64ISD::SUBS) {
- std::swap(Cmp0, Cmp1);
- std::swap(CC0, CC1);
- }
-
- if (Cmp1.getOpcode() != AArch64ISD::SUBS)
- return SDValue();
-
- SDLoc DL(N);
- AArch64CC::CondCode InvCC0 = AArch64CC::getInvertedCondCode(CC0);
- SDValue Condition = DAG.getConstant(InvCC0, DL, MVT_CC);
- unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CC1);
- SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
- SDValue CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
- Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
- return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0),
- CSel0.getOperand(1), DAG.getConstant(CC1, DL, MVT::i32),
- CCmp);
-}
-
static SDValue performANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
SDValue LHS = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (SDValue R = PerformANDCSELCombine(N, DAG))
+ if (SDValue R = performANDORCSELCombine(N, DAG))
return R;
if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index d1430096e0c22..578a1eaf975fe 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -754,16 +754,12 @@ define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
@g = global i32 0
-; Should not use ccmp if we have to compute the or expression in an integer
-; register anyway because of other users.
define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
; CHECK-LABEL: select_noccmp2:
; CHECK: ; %bb.0:
; CHECK-NEXT: cmp x0, #0
-; CHECK-NEXT: cset w8, lt
-; CHECK-NEXT: cmp x0, #13
-; CHECK-NEXT: cset w9, gt
-; CHECK-NEXT: orr w8, w8, w9
+; CHECK-NEXT: ccmp x0, #13, #0, ge
+; CHECK-NEXT: cset w8, gt
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: csel x0, xzr, x3, ne
; CHECK-NEXT: sbfx w8, w8, #0, #1
@@ -799,21 +795,17 @@ define i32 @select_noccmp3(i32 %v0, i32 %v1, i32 %v2) {
; CHECK-LABEL: select_noccmp3:
; CHECK: ; %bb.0:
; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w8, lt
-; CHECK-NEXT: cmp w0, #13
-; CHECK-NEXT: cset w9, gt
+; CHECK-NEXT: ccmp w0, #13, #0, ge
+; CHECK-NEXT: cset w8, gt
; CHECK-NEXT: cmp w0, #22
-; CHECK-NEXT: cset w10, lt
-; CHECK-NEXT: cmp w0, #44
-; CHECK-NEXT: cset w11, gt
+; CHECK-NEXT: mov w9, #44
+; CHECK-NEXT: ccmp w0, w9, #0, ge
+; CHECK-NEXT: cset w9, gt
; CHECK-NEXT: cmp w0, #99
-; CHECK-NEXT: cset w12, eq
-; CHECK-NEXT: cmp w0, #77
-; CHECK-NEXT: cset w13, eq
-; CHECK-NEXT: orr w8, w8, w9
-; CHECK-NEXT: orr w9, w10, w11
; CHECK-NEXT: and w8, w8, w9
-; CHECK-NEXT: orr w9, w12, w13
+; CHECK-NEXT: mov w9, #77
+; CHECK-NEXT: ccmp w0, w9, #4, ne
+; CHECK-NEXT: cset w9, eq
; CHECK-NEXT: tst w8, w9
; CHECK-NEXT: csel w0, w1, w2, ne
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
index 034651afd254a..19ac1f0e53308 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
@@ -257,13 +257,12 @@ define dso_local i1 @test_setcc3() {
; CHECK-NEXT: ldr q1, [x8, :lo12:rhs]
; CHECK-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill
; CHECK-NEXT: bl __eqtf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w19, eq
+; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
; CHECK-NEXT: bl __unordtf2
; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: cset w8, ne
-; CHECK-NEXT: orr w0, w8, w19
+; CHECK-NEXT: ccmp w19, #0, #4, eq
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #48
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll
new file mode 100644
index 0000000000000..a86ff042bf0a9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll
@@ -0,0 +1,145 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
+
+; Ensure chains of comparisons produce chains of `ccmp`
+
+; (x0 < x1) && (x2 > x3)
+define i32 @cmp_and2(i32 %0, i32 %1, i32 %2, i32 %3) {
+; CHECK-LABEL: cmp_and2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: ccmp w2, w3, #0, lo
+; CHECK-NEXT: cset w0, hi
+; CHECK-NEXT: ret
+ %5 = icmp ult i32 %0, %1
+ %6 = icmp ugt i32 %2, %3
+ %7 = select i1 %5, i1 %6, i1 false
+ %8 = zext i1 %7 to i32
+ ret i32 %8
+}
+
+; (x0 < x1) && (x2 > x3) && (x4 != x5)
+define i32 @cmp_and3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
+; CHECK-LABEL: cmp_and3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: ccmp w2, w3, #0, lo
+; CHECK-NEXT: ccmp w4, w5, #4, hi
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %7 = icmp ult i32 %0, %1
+ %8 = icmp ugt i32 %2, %3
+ %9 = select i1 %7, i1 %8, i1 false
+ %10 = icmp ne i32 %4, %5
+ %11 = select i1 %9, i1 %10, i1 false
+ %12 = zext i1 %11 to i32
+ ret i32 %12
+}
+
+; (x0 < x1) && (x2 > x3) && (x4 != x5) && (x6 == x7)
+define i32 @cmp_and4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
+; CHECK-LABEL: cmp_and4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp w2, w3
+; CHECK-NEXT: ccmp w0, w1, #2, hi
+; CHECK-NEXT: ccmp w4, w5, #4, lo
+; CHECK-NEXT: ccmp w6, w7, #0, ne
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %9 = icmp ugt i32 %2, %3
+ %10 = icmp ult i32 %0, %1
+ %11 = select i1 %9, i1 %10, i1 false
+ %12 = icmp ne i32 %4, %5
+ %13 = select i1 %11, i1 %12, i1 false
+ %14 = icmp eq i32 %6, %7
+ %15 = select i1 %13, i1 %14, i1 false
+ %16 = zext i1 %15 to i32
+ ret i32 %16
+}
+
+; (x0 < x1) || (x2 > x3)
+define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) {
+; CHECK-LABEL: cmp_or2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: ccmp w2, w3, #0, hs
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %5 = icmp ult i32 %0, %1
+ %6 = icmp ne i32 %2, %3
+ %7 = select i1 %5, i1 true, i1 %6
+ %8 = zext i1 %7 to i32
+ ret i32 %8
+}
+
+; (x0 < x1) || (x2 > x3) || (x4 != x5)
+define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
+; CHECK-LABEL: cmp_or3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: ccmp w2, w3, #2, hs
+; CHECK-NEXT: ccmp w4, w5, #0, ls
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %7 = icmp ult i32 %0, %1
+ %8 = icmp ugt i32 %2, %3
+ %9 = select i1 %7, i1 true, i1 %8
+ %10 = icmp ne i32 %4, %5
+ %11 = select i1 %9, i1 true, i1 %10
+ %12 = zext i1 %11 to i32
+ ret i32 %12
+}
+
+; (x0 < x1) || (x2 > x3) || (x4 != x5) || (x6 == x7)
+define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
+; CHECK-LABEL: cmp_or4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: ccmp w2, w3, #2, hs
+; CHECK-NEXT: ccmp w4, w5, #0, ls
+; CHECK-NEXT: ccmp w6, w7, #4, eq
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %9 = icmp ult i32 %0, %1
+ %10 = icmp ugt i32 %2, %3
+ %11 = select i1 %9, i1 true, i1 %10
+ %12 = icmp ne i32 %4, %5
+ %13 = select i1 %11, i1 true, i1 %12
+ %14 = icmp eq i32 %6, %7
+ %15 = select i1 %13, i1 true, i1 %14
+ %16 = zext i1 %15 to i32
+ ret i32 %16
+}
+
+; (x0 != 0) || (x1 != 0)
+define i32 @true_or2(i32 %0, i32 %1) {
+; CHECK-LABEL: true_or2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: orr w8, w0, w1
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %3 = icmp ne i32 %0, 0
+ %4 = icmp ne i32 %1, 0
+ %5 = select i1 %3, i1 true, i1 %4
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+; (x0 != 0) || (x1 != 0) || (x2 != 0)
+define i32 @true_or3(i32 %0, i32 %1, i32 %2) {
+; CHECK-LABEL: true_or3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: orr w8, w0, w1
+; CHECK-NEXT: orr w8, w8, w2
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %4 = icmp ne i32 %0, 0
+ %5 = icmp ne i32 %1, 0
+ %6 = select i1 %4, i1 true, i1 %5
+ %7 = icmp ne i32 %2, 0
+ %8 = select i1 %6, i1 true, i1 %7
+ %9 = zext i1 %8 to i32
+ ret i32 %9
+}
diff --git a/llvm/test/CodeGen/AArch64/select-with-and-or.ll b/llvm/test/CodeGen/AArch64/select-with-and-or.ll
index 20620c5f70863..84b6818eaa739 100644
--- a/llvm/test/CodeGen/AArch64/select-with-and-or.ll
+++ b/llvm/test/CodeGen/AArch64/select-with-and-or.ll
@@ -18,10 +18,8 @@ define i1 @or(i32 %x, i32 %y, i32 %z, i32 %w) {
; CHECK-LABEL: or:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp w0, w1
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: cmp w2, w3
-; CHECK-NEXT: cset w9, gt
-; CHECK-NEXT: orr w0, w8, w9
+; CHECK-NEXT: ccmp w2, w3, #0, ne
+; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
%a = icmp eq i32 %x, %y
%b = icmp sgt i32 %z, %w
@@ -46,10 +44,8 @@ define i1 @or_not(i32 %x, i32 %y, i32 %z, i32 %w) {
; CHECK-LABEL: or_not:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp w0, w1
-; CHECK-NEXT: cset w8, ne
-; CHECK-NEXT: cmp w2, w3
-; CHECK-NEXT: cset w9, gt
-; CHECK-NEXT: orr w0, w8, w9
+; CHECK-NEXT: ccmp w2, w3, #0, eq
+; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
%a = icmp eq i32 %x, %y
%b = icmp sgt i32 %z, %w
diff --git a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
index f49897b9f8c87..65a13fb9cbb46 100644
--- a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
@@ -4,24 +4,21 @@
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; AARCH-LABEL: muloti_test:
; AARCH: // %bb.0: // %start
-; AARCH-NEXT: umulh x8, x1, x2
-; AARCH-NEXT: mul x9, x3, x0
-; AARCH-NEXT: cmp xzr, x8
-; AARCH-NEXT: umulh x10, x3, x0
-; AARCH-NEXT: cset w8, ne
+; AARCH-NEXT: mul x8, x3, x0
+; AARCH-NEXT: umulh x9, x0, x2
+; AARCH-NEXT: madd x8, x1, x2, x8
+; AARCH-NEXT: umulh x10, x1, x2
+; AARCH-NEXT: adds x8, x9, x8
+; AARCH-NEXT: cset w9, hs
; AARCH-NEXT: cmp x1, #0
; AARCH-NEXT: ccmp x3, #0, #4, ne
-; AARCH-NEXT: madd x9, x1, x2, x9
-; AARCH-NEXT: cset w11, ne
-; AARCH-NEXT: cmp xzr, x10
-; AARCH-NEXT: umulh x10, x0, x2
-; AARCH-NEXT: orr w8, w11, w8
-; AARCH-NEXT: cset w11, ne
+; AARCH-NEXT: mov x1, x8
+; AARCH-NEXT: ccmp xzr, x10, #0, eq
+; AARCH-NEXT: umulh x10, x3, x0
; AARCH-NEXT: mul x0, x0, x2
-; AARCH-NEXT: adds x1, x10, x9
-; AARCH-NEXT: orr w8, w8, w11
-; AARCH-NEXT: cset w9, hs
-; AARCH-NEXT: orr w2, w8, w9
+; AARCH-NEXT: ccmp xzr, x10, #0, eq
+; AARCH-NEXT: cset w10, ne
+; AARCH-NEXT: orr w2, w10, w9
; AARCH-NEXT: ret
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll
index 602e4c727ef12..2bb7f7fcdd3e2 100644
--- a/llvm/test/CodeGen/AArch64/vec_umulo.ll
+++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll
@@ -322,48 +322,40 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind {
; CHECK-LABEL: umulo_v2i128:
; CHECK: // %bb.0:
-; CHECK-NEXT: umulh x8, x3, x6
-; CHECK-NEXT: mul x10, x7, x2
-; CHECK-NEXT: cmp xzr, x8
-; CHECK-NEXT: umulh x8, x7, x2
-; CHECK-NEXT: cset w9, ne
+; CHECK-NEXT: mul x8, x7, x2
+; CHECK-NEXT: umulh x9, x2, x6
+; CHECK-NEXT: madd x8, x3, x6, x8
+; CHECK-NEXT: umulh x10, x3, x6
+; CHECK-NEXT: adds x8, x9, x8
+; CHECK-NEXT: umulh x11, x7, x2
+; CHECK-NEXT: cset w9, hs
; CHECK-NEXT: cmp x3, #0
; CHECK-NEXT: ccmp x7, #0, #4, ne
-; CHECK-NEXT: umulh x11, x2, x6
-; CHECK-NEXT: madd x10, x3, x6, x10
-; CHECK-NEXT: umulh x12, x1, x4
-; CHECK-NEXT: cset w13, ne
-; CHECK-NEXT: cmp xzr, x8
-; CHECK-NEXT: cset w8, ne
+; CHECK-NEXT: umulh x13, x1, x4
+; CHECK-NEXT: ccmp xzr, x10, #0, eq
+; CHECK-NEXT: mul x10, x5, x0
+; CHECK-NEXT: madd x10, x1, x4, x10
+; CHECK-NEXT: ccmp xzr, x11, #0, eq
+; CHECK-NEXT: umulh x11, x0, x4
+; CHECK-NEXT: cset w12, ne
; CHECK-NEXT: adds x10, x11, x10
; CHECK-NEXT: cset w11, hs
-; CHECK-NEXT: cmp xzr, x12
-; CHECK-NEXT: cset w12, ne
; CHECK-NEXT: cmp x1, #0
; CHECK-NEXT: ccmp x5, #0, #4, ne
-; CHECK-NEXT: mul x15, x5, x0
-; CHECK-NEXT: umulh x14, x5, x0
-; CHECK-NEXT: orr w9, w13, w9
-; CHECK-NEXT: umulh x16, x0, x4
-; CHECK-NEXT: orr w8, w9, w8
-; CHECK-NEXT: madd x15, x1, x4, x15
-; CHECK-NEXT: cset w17, ne
-; CHECK-NEXT: cmp xzr, x14
-; CHECK-NEXT: orr w12, w17, w12
-; CHECK-NEXT: cset w14, ne
-; CHECK-NEXT: adds x15, x16, x15
-; CHECK-NEXT: orr w12, w12, w14
-; CHECK-NEXT: cset w14, hs
-; CHECK-NEXT: orr w12, w12, w14
-; CHECK-NEXT: orr w8, w8, w11
-; CHECK-NEXT: mul x11, x0, x4
-; CHECK-NEXT: ldr x9, [sp]
-; CHECK-NEXT: fmov s0, w12
-; CHECK-NEXT: stp x11, x15, [x9]
-; CHECK-NEXT: mov v0.s[1], w8
-; CHECK-NEXT: mul x8, x2, x6
+; CHECK-NEXT: orr w9, w12, w9
+; CHECK-NEXT: mul x12, x0, x4
+; CHECK-NEXT: ccmp xzr, x13, #0, eq
+; CHECK-NEXT: umulh x13, x5, x0
+; CHECK-NEXT: ccmp xzr, x13, #0, eq
+; CHECK-NEXT: cset w13, ne
+; CHECK-NEXT: orr w11, w13, w11
+; CHECK-NEXT: fmov s0, w11
+; CHECK-NEXT: ldr x11, [sp]
+; CHECK-NEXT: mov v0.s[1], w9
+; CHECK-NEXT: mul x9, x2, x6
+; CHECK-NEXT: stp x12, x10, [x11]
; CHECK-NEXT: shl v0.2s, v0.2s, #31
-; CHECK-NEXT: stp x8, x10, [x9, #16]
+; CHECK-NEXT: stp x9, x8, [x11, #16]
; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
; CHECK-NEXT: ret
%t = call {<2 x i128>, <2 x i1>} @llvm.umul.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
More information about the llvm-commits
mailing list