[llvm] 1c06817 - Revert "[AArch64] Optimize memcmp when the result is tested for [in]equality with 0"
Paul Kirth via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 28 16:25:12 PDT 2022
Author: Paul Kirth
Date: 2022-10-28T23:18:21Z
New Revision: 1c0681757669880bda144aeb56dcad6901a2016b
URL: https://github.com/llvm/llvm-project/commit/1c0681757669880bda144aeb56dcad6901a2016b
DIFF: https://github.com/llvm/llvm-project/commit/1c0681757669880bda144aeb56dcad6901a2016b.diff
LOG: Revert "[AArch64] Optimize memcmp when the result is tested for [in]equality with 0"
This reverts commit 01ff511593d1a4920fa3c1d450ad2077661e0bdc.
It triggers an assertion failure in SelectionDAG.cpp
see https://github.com/llvm/llvm-project/issues/58675 for details.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/atomicrmw-O0.ll
llvm/test/CodeGen/AArch64/bcmp-inline-small.ll
llvm/test/CodeGen/AArch64/bcmp.ll
llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
llvm/test/CodeGen/AArch64/i128-cmp.ll
llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index abf88b46f98ea..3194f54aab702 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19490,35 +19490,6 @@ static SDValue performSETCCCombine(SDNode *N,
}
}
- // Try to express conjunction "cmp 0 (or (xor A0 A1) (xor B0 B1))" as:
- // cmp A0, A0; ccmp A0, B1, 0, eq; cmp inv(Cond) flag
- if (!DCI.isBeforeLegalize() && VT.isScalarInteger() &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE) && isNullConstant(RHS) &&
- LHS->getOpcode() == ISD::OR &&
- (LHS.getOperand(0)->getOpcode() == ISD::XOR &&
- LHS.getOperand(1)->getOpcode() == ISD::XOR) &&
- LHS.hasOneUse() && LHS.getOperand(0)->hasOneUse() &&
- LHS.getOperand(1)->hasOneUse()) {
- SDValue XOR0 = LHS.getOperand(0);
- SDValue XOR1 = LHS.getOperand(1);
- SDValue CCVal = DAG.getConstant(AArch64CC::EQ, DL, MVT_CC);
- EVT TstVT = LHS->getValueType(0);
- SDValue Cmp =
- DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(TstVT, MVT::Glue),
- XOR0.getOperand(0), XOR0.getOperand(1));
- SDValue Overflow = Cmp.getValue(1);
- SDValue NZCVOp = DAG.getConstant(0, DL, MVT::i32);
- SDValue CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, XOR1.getOperand(0),
- XOR1.getOperand(1), NZCVOp, CCVal, Overflow);
- // Invert CSEL's operands.
- SDValue TVal = DAG.getConstant(1, DL, VT);
- SDValue FVal = DAG.getConstant(0, DL, VT);
- AArch64CC::CondCode CC = changeIntCCToAArch64CC(Cond);
- AArch64CC::CondCode InvCC = AArch64CC::getInvertedCondCode(CC);
- return DAG.getNode(AArch64ISD::CSEL, DL, VT, FVal, TVal,
- DAG.getConstant(InvCC, DL, MVT::i32), CCmp);
- }
-
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll
index ec5f8e2524994..d16c8aaff1899 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll
@@ -216,40 +216,38 @@ define i128 @test_rmw_add_128(i128* %dst) {
; NOLSE-NEXT: // =>This Loop Header: Depth=1
; NOLSE-NEXT: // Child Loop BB4_2 Depth 2
; NOLSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload
-; NOLSE-NEXT: ldr x13, [sp, #32] // 8-byte Folded Reload
-; NOLSE-NEXT: ldr x10, [sp, #24] // 8-byte Folded Reload
-; NOLSE-NEXT: adds x14, x13, #1
+; NOLSE-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload
+; NOLSE-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload
+; NOLSE-NEXT: adds x14, x8, #1
; NOLSE-NEXT: cinc x15, x11, hs
; NOLSE-NEXT: .LBB4_2: // %atomicrmw.start
; NOLSE-NEXT: // Parent Loop BB4_1 Depth=1
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; NOLSE-NEXT: ldaxp x12, x8, [x10]
-; NOLSE-NEXT: cmp x12, x13
-; NOLSE-NEXT: cset w9, ne
-; NOLSE-NEXT: cmp x8, x11
-; NOLSE-NEXT: cinc w9, w9, ne
-; NOLSE-NEXT: cbnz w9, .LBB4_4
+; NOLSE-NEXT: ldaxp x10, x9, [x13]
+; NOLSE-NEXT: cmp x10, x8
+; NOLSE-NEXT: cset w12, ne
+; NOLSE-NEXT: cmp x9, x11
+; NOLSE-NEXT: cinc w12, w12, ne
+; NOLSE-NEXT: cbnz w12, .LBB4_4
; NOLSE-NEXT: // %bb.3: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2
-; NOLSE-NEXT: stlxp w9, x14, x15, [x10]
-; NOLSE-NEXT: cbnz w9, .LBB4_2
+; NOLSE-NEXT: stlxp w12, x14, x15, [x13]
+; NOLSE-NEXT: cbnz w12, .LBB4_2
; NOLSE-NEXT: b .LBB4_5
; NOLSE-NEXT: .LBB4_4: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2
-; NOLSE-NEXT: stlxp w9, x12, x8, [x10]
-; NOLSE-NEXT: cbnz w9, .LBB4_2
+; NOLSE-NEXT: stlxp w12, x10, x9, [x13]
+; NOLSE-NEXT: cbnz w12, .LBB4_2
; NOLSE-NEXT: .LBB4_5: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB4_1 Depth=1
-; NOLSE-NEXT: mov x9, x8
+; NOLSE-NEXT: eor x11, x9, x11
+; NOLSE-NEXT: eor x8, x10, x8
+; NOLSE-NEXT: orr x8, x8, x11
; NOLSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill
-; NOLSE-NEXT: mov x10, x12
; NOLSE-NEXT: str x10, [sp, #16] // 8-byte Folded Spill
-; NOLSE-NEXT: subs x12, x12, x13
-; NOLSE-NEXT: ccmp x8, x11, #0, eq
-; NOLSE-NEXT: cset w8, ne
; NOLSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill
; NOLSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill
-; NOLSE-NEXT: tbnz w8, #0, .LBB4_1
+; NOLSE-NEXT: cbnz x8, .LBB4_1
; NOLSE-NEXT: b .LBB4_6
; NOLSE-NEXT: .LBB4_6: // %atomicrmw.end
; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload
@@ -269,26 +267,26 @@ define i128 @test_rmw_add_128(i128* %dst) {
; LSE-NEXT: b .LBB4_1
; LSE-NEXT: .LBB4_1: // %atomicrmw.start
; LSE-NEXT: // =>This Inner Loop Header: Depth=1
-; LSE-NEXT: ldr x8, [sp, #40] // 8-byte Folded Reload
-; LSE-NEXT: ldr x11, [sp, #32] // 8-byte Folded Reload
+; LSE-NEXT: ldr x10, [sp, #40] // 8-byte Folded Reload
+; LSE-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload
; LSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload
-; LSE-NEXT: mov x0, x11
-; LSE-NEXT: mov x1, x8
-; LSE-NEXT: adds x2, x11, #1
-; LSE-NEXT: cinc x10, x8, hs
+; LSE-NEXT: mov x0, x8
+; LSE-NEXT: mov x1, x10
+; LSE-NEXT: adds x2, x8, #1
+; LSE-NEXT: cinc x11, x10, hs
; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3
-; LSE-NEXT: mov x3, x10
+; LSE-NEXT: mov x3, x11
; LSE-NEXT: caspal x0, x1, x2, x3, [x9]
; LSE-NEXT: mov x9, x1
; LSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill
+; LSE-NEXT: eor x11, x9, x10
; LSE-NEXT: mov x10, x0
; LSE-NEXT: str x10, [sp, #16] // 8-byte Folded Spill
-; LSE-NEXT: subs x11, x10, x11
-; LSE-NEXT: ccmp x9, x8, #0, eq
-; LSE-NEXT: cset w8, ne
+; LSE-NEXT: eor x8, x10, x8
+; LSE-NEXT: orr x8, x8, x11
; LSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill
; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill
-; LSE-NEXT: tbnz w8, #0, .LBB4_1
+; LSE-NEXT: cbnz x8, .LBB4_1
; LSE-NEXT: b .LBB4_2
; LSE-NEXT: .LBB4_2: // %atomicrmw.end
; LSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload
@@ -608,44 +606,42 @@ define i128 @test_rmw_nand_128(i128* %dst) {
; NOLSE-NEXT: // =>This Loop Header: Depth=1
; NOLSE-NEXT: // Child Loop BB9_2 Depth 2
; NOLSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload
-; NOLSE-NEXT: ldr x13, [sp, #32] // 8-byte Folded Reload
-; NOLSE-NEXT: ldr x10, [sp, #24] // 8-byte Folded Reload
-; NOLSE-NEXT: mov w8, w13
-; NOLSE-NEXT: mvn w9, w8
-; NOLSE-NEXT: // implicit-def: $x8
-; NOLSE-NEXT: mov w8, w9
-; NOLSE-NEXT: orr x14, x8, #0xfffffffffffffffe
+; NOLSE-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload
+; NOLSE-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload
+; NOLSE-NEXT: mov w9, w8
+; NOLSE-NEXT: mvn w10, w9
+; NOLSE-NEXT: // implicit-def: $x9
+; NOLSE-NEXT: mov w9, w10
+; NOLSE-NEXT: orr x14, x9, #0xfffffffffffffffe
; NOLSE-NEXT: mov x15, #-1
; NOLSE-NEXT: .LBB9_2: // %atomicrmw.start
; NOLSE-NEXT: // Parent Loop BB9_1 Depth=1
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
-; NOLSE-NEXT: ldaxp x12, x8, [x10]
-; NOLSE-NEXT: cmp x12, x13
-; NOLSE-NEXT: cset w9, ne
-; NOLSE-NEXT: cmp x8, x11
-; NOLSE-NEXT: cinc w9, w9, ne
-; NOLSE-NEXT: cbnz w9, .LBB9_4
+; NOLSE-NEXT: ldaxp x10, x9, [x13]
+; NOLSE-NEXT: cmp x10, x8
+; NOLSE-NEXT: cset w12, ne
+; NOLSE-NEXT: cmp x9, x11
+; NOLSE-NEXT: cinc w12, w12, ne
+; NOLSE-NEXT: cbnz w12, .LBB9_4
; NOLSE-NEXT: // %bb.3: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=2
-; NOLSE-NEXT: stlxp w9, x14, x15, [x10]
-; NOLSE-NEXT: cbnz w9, .LBB9_2
+; NOLSE-NEXT: stlxp w12, x14, x15, [x13]
+; NOLSE-NEXT: cbnz w12, .LBB9_2
; NOLSE-NEXT: b .LBB9_5
; NOLSE-NEXT: .LBB9_4: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=2
-; NOLSE-NEXT: stlxp w9, x12, x8, [x10]
-; NOLSE-NEXT: cbnz w9, .LBB9_2
+; NOLSE-NEXT: stlxp w12, x10, x9, [x13]
+; NOLSE-NEXT: cbnz w12, .LBB9_2
; NOLSE-NEXT: .LBB9_5: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB9_1 Depth=1
-; NOLSE-NEXT: mov x9, x8
+; NOLSE-NEXT: eor x11, x9, x11
+; NOLSE-NEXT: eor x8, x10, x8
+; NOLSE-NEXT: orr x8, x8, x11
; NOLSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill
-; NOLSE-NEXT: mov x10, x12
; NOLSE-NEXT: str x10, [sp, #16] // 8-byte Folded Spill
-; NOLSE-NEXT: subs x12, x12, x13
-; NOLSE-NEXT: ccmp x8, x11, #0, eq
-; NOLSE-NEXT: cset w8, ne
; NOLSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill
; NOLSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill
-; NOLSE-NEXT: tbnz w8, #0, .LBB9_1
+; NOLSE-NEXT: cbnz x8, .LBB9_1
; NOLSE-NEXT: b .LBB9_6
; NOLSE-NEXT: .LBB9_6: // %atomicrmw.end
; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload
@@ -665,30 +661,30 @@ define i128 @test_rmw_nand_128(i128* %dst) {
; LSE-NEXT: b .LBB9_1
; LSE-NEXT: .LBB9_1: // %atomicrmw.start
; LSE-NEXT: // =>This Inner Loop Header: Depth=1
-; LSE-NEXT: ldr x8, [sp, #40] // 8-byte Folded Reload
-; LSE-NEXT: ldr x11, [sp, #32] // 8-byte Folded Reload
+; LSE-NEXT: ldr x10, [sp, #40] // 8-byte Folded Reload
+; LSE-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload
; LSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload
-; LSE-NEXT: mov x0, x11
-; LSE-NEXT: mov x1, x8
-; LSE-NEXT: mov w10, w11
-; LSE-NEXT: mvn w12, w10
-; LSE-NEXT: // implicit-def: $x10
-; LSE-NEXT: mov w10, w12
-; LSE-NEXT: orr x2, x10, #0xfffffffffffffffe
-; LSE-NEXT: mov x10, #-1
+; LSE-NEXT: mov x0, x8
+; LSE-NEXT: mov x1, x10
+; LSE-NEXT: mov w11, w8
+; LSE-NEXT: mvn w12, w11
+; LSE-NEXT: // implicit-def: $x11
+; LSE-NEXT: mov w11, w12
+; LSE-NEXT: orr x2, x11, #0xfffffffffffffffe
+; LSE-NEXT: mov x11, #-1
; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3
-; LSE-NEXT: mov x3, x10
+; LSE-NEXT: mov x3, x11
; LSE-NEXT: caspal x0, x1, x2, x3, [x9]
; LSE-NEXT: mov x9, x1
; LSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill
+; LSE-NEXT: eor x11, x9, x10
; LSE-NEXT: mov x10, x0
; LSE-NEXT: str x10, [sp, #16] // 8-byte Folded Spill
-; LSE-NEXT: subs x11, x10, x11
-; LSE-NEXT: ccmp x9, x8, #0, eq
-; LSE-NEXT: cset w8, ne
+; LSE-NEXT: eor x8, x10, x8
+; LSE-NEXT: orr x8, x8, x11
; LSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill
; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill
-; LSE-NEXT: tbnz w8, #0, .LBB9_1
+; LSE-NEXT: cbnz x8, .LBB9_1
; LSE-NEXT: b .LBB9_2
; LSE-NEXT: .LBB9_2: // %atomicrmw.end
; LSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll b/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll
index 60b0c37cb8535..8a2429b064adc 100644
--- a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll
+++ b/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll
@@ -12,8 +12,10 @@ define i1 @test_b2(i8* %s1, i8* %s2) {
; CHECKN-NEXT: ldr x9, [x1]
; CHECKN-NEXT: ldur x10, [x0, #7]
; CHECKN-NEXT: ldur x11, [x1, #7]
-; CHECKN-NEXT: cmp x8, x9
-; CHECKN-NEXT: ccmp x10, x11, #0, eq
+; CHECKN-NEXT: eor x8, x8, x9
+; CHECKN-NEXT: eor x9, x10, x11
+; CHECKN-NEXT: orr x8, x8, x9
+; CHECKN-NEXT: cmp x8, #0
; CHECKN-NEXT: cset w0, eq
; CHECKN-NEXT: ret
;
@@ -42,8 +44,10 @@ define i1 @test_b2_align8(i8* align 8 %s1, i8* align 8 %s2) {
; CHECKN-NEXT: ldr x9, [x1]
; CHECKN-NEXT: ldur x10, [x0, #7]
; CHECKN-NEXT: ldur x11, [x1, #7]
-; CHECKN-NEXT: cmp x8, x9
-; CHECKN-NEXT: ccmp x10, x11, #0, eq
+; CHECKN-NEXT: eor x8, x8, x9
+; CHECKN-NEXT: eor x9, x10, x11
+; CHECKN-NEXT: orr x8, x8, x9
+; CHECKN-NEXT: cmp x8, #0
; CHECKN-NEXT: cset w0, eq
; CHECKN-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/bcmp.ll b/llvm/test/CodeGen/AArch64/bcmp.ll
index 510c64ee1111a..ff94642857e63 100644
--- a/llvm/test/CodeGen/AArch64/bcmp.ll
+++ b/llvm/test/CodeGen/AArch64/bcmp.ll
@@ -113,8 +113,10 @@ define i1 @bcmp7(ptr %a, ptr %b) {
; CHECK-NEXT: ldr w9, [x1]
; CHECK-NEXT: ldur w10, [x0, #3]
; CHECK-NEXT: ldur w11, [x1, #3]
-; CHECK-NEXT: cmp w8, w9
-; CHECK-NEXT: ccmp w10, w11, #0, eq
+; CHECK-NEXT: eor w8, w8, w9
+; CHECK-NEXT: eor w9, w10, w11
+; CHECK-NEXT: orr w8, w8, w9
+; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%cr = call i32 @bcmp(ptr %a, ptr %b, i64 7)
@@ -180,8 +182,10 @@ define i1 @bcmp11(ptr %a, ptr %b) {
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: ldur x10, [x0, #3]
; CHECK-NEXT: ldur x11, [x1, #3]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
+; CHECK-NEXT: eor x8, x8, x9
+; CHECK-NEXT: eor x9, x10, x11
+; CHECK-NEXT: orr x8, x8, x9
+; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%cr = call i32 @bcmp(ptr %a, ptr %b, i64 11)
@@ -214,8 +218,10 @@ define i1 @bcmp13(ptr %a, ptr %b) {
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: ldur x10, [x0, #5]
; CHECK-NEXT: ldur x11, [x1, #5]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
+; CHECK-NEXT: eor x8, x8, x9
+; CHECK-NEXT: eor x9, x10, x11
+; CHECK-NEXT: orr x8, x8, x9
+; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%cr = call i32 @bcmp(ptr %a, ptr %b, i64 13)
@@ -230,8 +236,10 @@ define i1 @bcmp14(ptr %a, ptr %b) {
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: ldur x10, [x0, #6]
; CHECK-NEXT: ldur x11, [x1, #6]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
+; CHECK-NEXT: eor x8, x8, x9
+; CHECK-NEXT: eor x9, x10, x11
+; CHECK-NEXT: orr x8, x8, x9
+; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%cr = call i32 @bcmp(ptr %a, ptr %b, i64 14)
@@ -246,8 +254,10 @@ define i1 @bcmp15(ptr %a, ptr %b) {
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: ldur x10, [x0, #7]
; CHECK-NEXT: ldur x11, [x1, #7]
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: ccmp x10, x11, #0, eq
+; CHECK-NEXT: eor x8, x8, x9
+; CHECK-NEXT: eor x9, x10, x11
+; CHECK-NEXT: orr x8, x8, x9
+; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%cr = call i32 @bcmp(ptr %a, ptr %b, i64 15)
@@ -260,8 +270,10 @@ define i1 @bcmp16(ptr %a, ptr %b) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp x8, x9, [x0]
; CHECK-NEXT: ldp x10, x11, [x1]
-; CHECK-NEXT: cmp x8, x10
-; CHECK-NEXT: ccmp x9, x11, #0, eq
+; CHECK-NEXT: eor x8, x8, x10
+; CHECK-NEXT: eor x9, x9, x11
+; CHECK-NEXT: orr x8, x8, x9
+; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%cr = call i32 @bcmp(ptr %a, ptr %b, i64 16)
diff --git a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
index 885d4a98d77af..f826a80940468 100644
--- a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
+++ b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
@@ -128,82 +128,3 @@ define i1 @combine_setcc_ne_vecreduce_or_v64i1(<64 x i8> %a) {
%cmp2 = icmp ne i64 %cast, zeroinitializer
ret i1 %cmp2
}
-
-define i1 @combine_setcc_eq0_conjunction_xor_or(ptr %a, ptr %b) {
-; CHECK-LABEL: combine_setcc_eq0_conjunction_xor_or:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x9, [x0]
-; CHECK-NEXT: ldp x10, x11, [x1]
-; CHECK-NEXT: cmp x8, x10
-; CHECK-NEXT: ccmp x9, x11, #0, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
- %bcmp = tail call i32 @bcmp(ptr dereferenceable(16) %a, ptr dereferenceable(16) %b, i64 16)
- %cmp = icmp eq i32 %bcmp, 0
- ret i1 %cmp
-}
-
-define i1 @combine_setcc_ne0_conjunction_xor_or(ptr %a, ptr %b) {
-; CHECK-LABEL: combine_setcc_ne0_conjunction_xor_or:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x8, x9, [x0]
-; CHECK-NEXT: ldp x10, x11, [x1]
-; CHECK-NEXT: cmp x8, x10
-; CHECK-NEXT: ccmp x9, x11, #0, eq
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: ret
- %bcmp = tail call i32 @bcmp(ptr dereferenceable(16) %a, ptr dereferenceable(16) %b, i64 16)
- %cmp = icmp ne i32 %bcmp, 0
- ret i1 %cmp
-}
-
-; Doesn't increase the number of instructions, where the LHS has multiple uses
-define i32 @combine_setcc_multiuse(i32 %0, i32 %1, i32 %2, i32 %3) {
-; CHECK-LABEL: combine_setcc_multiuse:
-; CHECK: // %bb.0:
-; CHECK-NEXT: eor w8, w1, w0
-; CHECK-NEXT: eor w9, w3, w2
-; CHECK-NEXT: orr w8, w9, w8
-; CHECK-NEXT: cbz w8, .LBB10_2
-; CHECK-NEXT: // %bb.1:
-; CHECK-NEXT: mov w0, w8
-; CHECK-NEXT: b use
-; CHECK-NEXT: .LBB10_2:
-; CHECK-NEXT: ret
- %5 = xor i32 %1, %0
- %6 = xor i32 %3, %2
- %7 = or i32 %6, %5
- %8 = icmp eq i32 %7, 0
- br i1 %8, label %11, label %9
-
-9: ; preds = %4
- %10 = tail call i32 @use(i32 %7) #2
- br label %11
-
-11: ; preds = %4, %9
- %12 = phi i32 [ %10, %9 ], [ %0, %4 ]
- ret i32 %12
-}
-
-; There may be issues with the CMP/CCMP with the scheduling of instructions
-; that ISel will create out of the DAG
-define i32 @combine_setcc_glue(i128 noundef %x, i128 noundef %y) {
-; CHECK-LABEL: combine_setcc_glue:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: cmp x0, x2
-; CHECK-NEXT: ccmp x1, x3, #0, eq
-; CHECK-NEXT: ccmp x0, x2, #4, ne
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ret
-entry:
- %cmp3 = icmp eq i128 %x, %y
- %conv = trunc i128 %x to i64
- %conv1 = trunc i128 %y to i64
- %cmp = icmp eq i64 %conv, %conv1
- %or7 = or i1 %cmp3, %cmp
- %or = zext i1 %or7 to i32
- ret i32 %or
-}
-
-declare i32 @bcmp(ptr nocapture, ptr nocapture, i64)
-declare i32 @use(i32 noundef)
diff --git a/llvm/test/CodeGen/AArch64/i128-cmp.ll b/llvm/test/CodeGen/AArch64/i128-cmp.ll
index b50a559434302..7cc3e843ba247 100644
--- a/llvm/test/CodeGen/AArch64/i128-cmp.ll
+++ b/llvm/test/CodeGen/AArch64/i128-cmp.ll
@@ -6,8 +6,10 @@ declare void @call()
define i1 @cmp_i128_eq(i128 %a, i128 %b) {
; CHECK-LABEL: cmp_i128_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x2
-; CHECK-NEXT: ccmp x1, x3, #0, eq
+; CHECK-NEXT: eor x8, x1, x3
+; CHECK-NEXT: eor x9, x0, x2
+; CHECK-NEXT: orr x8, x9, x8
+; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%cmp = icmp eq i128 %a, %b
@@ -17,8 +19,10 @@ define i1 @cmp_i128_eq(i128 %a, i128 %b) {
define i1 @cmp_i128_ne(i128 %a, i128 %b) {
; CHECK-LABEL: cmp_i128_ne:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x2
-; CHECK-NEXT: ccmp x1, x3, #0, eq
+; CHECK-NEXT: eor x8, x1, x3
+; CHECK-NEXT: eor x9, x0, x2
+; CHECK-NEXT: orr x8, x9, x8
+; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%cmp = icmp ne i128 %a, %b
@@ -116,9 +120,10 @@ define i1 @cmp_i128_sle(i128 %a, i128 %b) {
define void @br_on_cmp_i128_eq(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: br_on_cmp_i128_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x2
-; CHECK-NEXT: ccmp x1, x3, #0, eq
-; CHECK-NEXT: b.ne .LBB10_2
+; CHECK-NEXT: eor x8, x1, x3
+; CHECK-NEXT: eor x9, x0, x2
+; CHECK-NEXT: orr x8, x9, x8
+; CHECK-NEXT: cbnz x8, .LBB10_2
; CHECK-NEXT: // %bb.1: // %call
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: bl call
@@ -137,9 +142,10 @@ exit:
define void @br_on_cmp_i128_ne(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: br_on_cmp_i128_ne:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x2
-; CHECK-NEXT: ccmp x1, x3, #0, eq
-; CHECK-NEXT: b.eq .LBB11_2
+; CHECK-NEXT: eor x8, x1, x3
+; CHECK-NEXT: eor x9, x0, x2
+; CHECK-NEXT: orr x8, x9, x8
+; CHECK-NEXT: cbz x8, .LBB11_2
; CHECK-NEXT: // %bb.1: // %call
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: bl call
diff --git a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
index e298748e8ec26..e955014371525 100644
--- a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
@@ -68,10 +68,12 @@ define i128 @__muloti4(i128 %0, i128 %1, i32* nocapture nonnull writeonly align
; AARCH-NEXT: adds x11, x12, x11
; AARCH-NEXT: adc x12, x13, x14
; AARCH-NEXT: adds x10, x11, x10
-; AARCH-NEXT: asr x11, x1, #63
; AARCH-NEXT: adc x9, x12, x9
-; AARCH-NEXT: cmp x10, x11
-; AARCH-NEXT: ccmp x9, x11, #0, eq
+; AARCH-NEXT: asr x11, x1, #63
+; AARCH-NEXT: eor x9, x9, x11
+; AARCH-NEXT: eor x10, x10, x11
+; AARCH-NEXT: orr x9, x10, x9
+; AARCH-NEXT: cmp x9, #0
; AARCH-NEXT: cset w9, ne
; AARCH-NEXT: tbz x8, #63, .LBB1_2
; AARCH-NEXT: // %bb.1: // %Entry
More information about the llvm-commits
mailing list