[llvm] r344439 - [AArch64] Swap comparison operands if that enables some folding.
Arnaud A. de Grandmaison via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 13 00:43:56 PDT 2018
Author: aadg
Date: Sat Oct 13 00:43:56 2018
New Revision: 344439
URL: http://llvm.org/viewvc/llvm-project?rev=344439&view=rev
Log:
[AArch64] Swap comparison operands if that enables some folding.
Summary:
AArch64 can fold some shift+extend operations on the RHS operand of
comparisons, so swap the operands if that makes sense.
This provides a fix for https://bugs.llvm.org/show_bug.cgi?id=38751
Reviewers: efriedma, t.p.northover, javed.absar
Subscribers: mcrosier, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D53067
Added:
llvm/trunk/test/CodeGen/AArch64/swap-compare-operands.ll
Modified:
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/test/CodeGen/AArch64/and-mask-removal.ll
llvm/trunk/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll
llvm/trunk/test/CodeGen/AArch64/sat-add.ll
llvm/trunk/test/CodeGen/AArch64/signed-truncation-check.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=344439&r1=344438&r2=344439&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Sat Oct 13 00:43:56 2018
@@ -1460,6 +1460,21 @@ static bool isLegalArithImmed(uint64_t C
return IsLegal;
}
+// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
+// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
+// can be set differently by this operation. It comes down to whether
+// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
+// everything is fine. If not then the optimization is wrong. Thus general
+// comparisons are only valid if op2 != 0.
+//
+// So, finally, the only LLVM-native comparisons that don't mention C and V
+// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
+// the absence of information about op2.
+static bool isCMN(SDValue Op, ISD::CondCode CC) {
+ return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE);
+}
+
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
EVT VT = LHS.getValueType();
@@ -1482,18 +1497,8 @@ static SDValue emitComparison(SDValue LH
// register to WZR/XZR if it ends up being unused.
unsigned Opcode = AArch64ISD::SUBS;
- if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
- (CC == ISD::SETEQ || CC == ISD::SETNE)) {
- // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
- // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
- // can be set differently by this operation. It comes down to whether
- // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
- // everything is fine. If not then the optimization is wrong. Thus general
- // comparisons are only valid if op2 != 0.
-
- // So, finally, the only LLVM-native comparisons that don't mention C and V
- // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
- // the absence of information about op2.
+ if (isCMN(RHS, CC)) {
+ // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
RHS = RHS.getOperand(1);
} else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
@@ -1765,6 +1770,42 @@ static SDValue emitConjunctionDisjunctio
/// @}
+/// Returns how profitable it is to fold a comparison's operand's shift and/or
+/// extension operations.
+static unsigned getCmpOperandFoldingProfit(SDValue Op) {
+ auto isSupportedExtend = [&](SDValue V) {
+ if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
+ return true;
+
+ if (V.getOpcode() == ISD::AND)
+ if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+ uint64_t Mask = MaskCst->getZExtValue();
+ return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
+ }
+
+ return false;
+ };
+
+ if (!Op.hasOneUse())
+ return 0;
+
+ if (isSupportedExtend(Op))
+ return 1;
+
+ unsigned Opc = Op.getOpcode();
+ if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
+ if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ uint64_t Shift = ShiftCst->getZExtValue();
+ if (isSupportedExtend(Op.getOperand(0)))
+ return (Shift <= 4) ? 2 : 1;
+ EVT VT = Op.getValueType();
+ if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
+ return 1;
+ }
+
+ return 0;
+}
+
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &AArch64cc, SelectionDAG &DAG,
const SDLoc &dl) {
@@ -1822,6 +1863,27 @@ static SDValue getAArch64Cmp(SDValue LHS
}
}
}
+
+ // Comparisons are canonicalized so that the RHS operand is simpler than the
+ // LHS one, the extreme case being when RHS is an immediate. However, AArch64
+ // can fold some shift+extend operations on the RHS operand, so swap the
+ // operands if that can be done.
+ //
+ // For example:
+ // lsl w13, w11, #1
+ // cmp w13, w12
+ // can be turned into:
+ // cmp w12, w11, lsl #1
+ if (!isa<ConstantSDNode>(RHS) ||
+ !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
+ SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
+
+ if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
+ std::swap(LHS, RHS);
+ CC = ISD::getSetCCSwappedOperands(CC);
+ }
+ }
+
SDValue Cmp;
AArch64CC::CondCode AArch64CC;
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
Modified: llvm/trunk/test/CodeGen/AArch64/and-mask-removal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/and-mask-removal.ll?rev=344439&r1=344438&r2=344439&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/and-mask-removal.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/and-mask-removal.ll Sat Oct 13 00:43:56 2018
@@ -179,7 +179,9 @@ ret_false:
ret_true:
ret i1 true
; CHECK-LABEL: test16_2
-; CHECK: and
+; CHECK: mov [[CST:w[0-9]+]], #16882
+; CHECK: add [[ADD:w[0-9]+]], w0, [[CST]]
+; CHECK: cmp {{.*}}, [[ADD]], uxth
; CHECK: ret
}
@@ -207,7 +209,9 @@ ret_false:
ret_true:
ret i1 true
; CHECK-LABEL: test16_4
-; CHECK: and
+; CHECK: mov [[CST:w[0-9]+]], #29985
+; CHECK: add [[ADD:w[0-9]+]], w0, [[CST]]
+; CHECK: cmp {{.*}}, [[ADD]], uxth
; CHECK: ret
}
@@ -249,7 +253,9 @@ ret_false:
ret_true:
ret i1 true
; CHECK-LABEL: test16_7
-; CHECK: and
+; CHECK: mov [[CST:w[0-9]+]], #9272
+; CHECK: add [[ADD:w[0-9]+]], w0, [[CST]]
+; CHECK: cmp {{.*}}, [[ADD]], uxth
; CHECK: ret
}
Modified: llvm/trunk/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll?rev=344439&r1=344438&r2=344439&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll Sat Oct 13 00:43:56 2018
@@ -35,8 +35,7 @@ define i1 @shifts_necmp_i16_i8(i16 %x) n
define i1 @shifts_necmp_i32_i16(i32 %x) nounwind {
; CHECK-LABEL: shifts_necmp_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cmp w0, w0, sxth
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = shl i32 %x, 16 ; 32-16
@@ -48,8 +47,7 @@ define i1 @shifts_necmp_i32_i16(i32 %x)
define i1 @shifts_necmp_i32_i8(i32 %x) nounwind {
; CHECK-LABEL: shifts_necmp_i32_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cmp w0, w0, sxtb
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = shl i32 %x, 24 ; 32-8
@@ -61,8 +59,7 @@ define i1 @shifts_necmp_i32_i8(i32 %x) n
define i1 @shifts_necmp_i64_i32(i64 %x) nounwind {
; CHECK-LABEL: shifts_necmp_i64_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtw x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxtw
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = shl i64 %x, 32 ; 64-32
@@ -74,8 +71,7 @@ define i1 @shifts_necmp_i64_i32(i64 %x)
define i1 @shifts_necmp_i64_i16(i64 %x) nounwind {
; CHECK-LABEL: shifts_necmp_i64_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxth
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = shl i64 %x, 48 ; 64-16
@@ -87,8 +83,7 @@ define i1 @shifts_necmp_i64_i16(i64 %x)
define i1 @shifts_necmp_i64_i8(i64 %x) nounwind {
; CHECK-LABEL: shifts_necmp_i64_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxtb
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = shl i64 %x, 56 ; 64-8
@@ -117,8 +112,7 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nou
define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cmp w0, w0, sxth
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i32 %x, -32768 ; ~0U << (16-1)
@@ -129,8 +123,7 @@ define i1 @add_ultcmp_i32_i16(i32 %x) no
define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i32_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cmp w0, w0, sxtb
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i32 %x, -128 ; ~0U << (8-1)
@@ -141,8 +134,7 @@ define i1 @add_ultcmp_i32_i8(i32 %x) nou
define i1 @add_ultcmp_i64_i32(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtw x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxtw
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, -2147483648 ; ~0U << (32-1)
@@ -153,8 +145,7 @@ define i1 @add_ultcmp_i64_i32(i64 %x) no
define i1 @add_ultcmp_i64_i16(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxth
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, -32768 ; ~0U << (16-1)
@@ -165,8 +156,7 @@ define i1 @add_ultcmp_i64_i16(i64 %x) no
define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxtb
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, -128 ; ~0U << (8-1)
@@ -208,8 +198,7 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nou
define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cmp w0, w0, sxth
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i32 %x, 32768 ; 1U << (16-1)
@@ -220,8 +209,7 @@ define i1 @add_ugecmp_i32_i16(i32 %x) no
define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i32_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cmp w0, w0, sxtb
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i32 %x, 128 ; 1U << (8-1)
@@ -232,8 +220,7 @@ define i1 @add_ugecmp_i32_i8(i32 %x) nou
define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtw x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxtw
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
@@ -244,8 +231,7 @@ define i1 @add_ugecmp_i64_i32(i64 %x) no
define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxth
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 32768 ; 1U << (16-1)
@@ -256,8 +242,7 @@ define i1 @add_ugecmp_i64_i16(i64 %x) no
define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxtb
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 128 ; 1U << (8-1)
Modified: llvm/trunk/test/CodeGen/AArch64/sat-add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/sat-add.ll?rev=344439&r1=344438&r2=344439&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/sat-add.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/sat-add.ll Sat Oct 13 00:43:56 2018
@@ -52,11 +52,10 @@ define i8 @unsigned_sat_constant_i8_usin
define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
; CHECK-LABEL: unsigned_sat_constant_i16_using_min:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: mov w9, #65493
-; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: mov w8, #65493
+; CHECK-NEXT: cmp w8, w0, uxth
; CHECK-NEXT: mov w8, #-43
-; CHECK-NEXT: csel w8, w0, w8, lo
+; CHECK-NEXT: csel w8, w0, w8, hi
; CHECK-NEXT: add w0, w8, #42 // =42
; CHECK-NEXT: ret
%c = icmp ult i16 %x, -43
@@ -82,11 +81,10 @@ define i16 @unsigned_sat_constant_i16_us
define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) {
; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_notval:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: mov w10, #65493
-; CHECK-NEXT: add w9, w0, #42 // =42
-; CHECK-NEXT: cmp w8, w10
-; CHECK-NEXT: csinv w0, w9, wzr, ls
+; CHECK-NEXT: mov w9, #65493
+; CHECK-NEXT: add w8, w0, #42 // =42
+; CHECK-NEXT: cmp w9, w0, uxth
+; CHECK-NEXT: csinv w0, w8, wzr, hs
; CHECK-NEXT: ret
%a = add i16 %x, 42
%c = icmp ugt i16 %x, -43
Modified: llvm/trunk/test/CodeGen/AArch64/signed-truncation-check.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/signed-truncation-check.ll?rev=344439&r1=344438&r2=344439&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/signed-truncation-check.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/signed-truncation-check.ll Sat Oct 13 00:43:56 2018
@@ -35,8 +35,7 @@ define i1 @shifts_eqcmp_i16_i8(i16 %x) n
define i1 @shifts_eqcmp_i32_i16(i32 %x) nounwind {
; CHECK-LABEL: shifts_eqcmp_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cmp w0, w0, sxth
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = shl i32 %x, 16 ; 32-16
@@ -48,8 +47,7 @@ define i1 @shifts_eqcmp_i32_i16(i32 %x)
define i1 @shifts_eqcmp_i32_i8(i32 %x) nounwind {
; CHECK-LABEL: shifts_eqcmp_i32_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cmp w0, w0, sxtb
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = shl i32 %x, 24 ; 32-8
@@ -61,8 +59,7 @@ define i1 @shifts_eqcmp_i32_i8(i32 %x) n
define i1 @shifts_eqcmp_i64_i32(i64 %x) nounwind {
; CHECK-LABEL: shifts_eqcmp_i64_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtw x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxtw
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = shl i64 %x, 32 ; 64-32
@@ -74,8 +71,7 @@ define i1 @shifts_eqcmp_i64_i32(i64 %x)
define i1 @shifts_eqcmp_i64_i16(i64 %x) nounwind {
; CHECK-LABEL: shifts_eqcmp_i64_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxth
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = shl i64 %x, 48 ; 64-16
@@ -87,8 +83,7 @@ define i1 @shifts_eqcmp_i64_i16(i64 %x)
define i1 @shifts_eqcmp_i64_i8(i64 %x) nounwind {
; CHECK-LABEL: shifts_eqcmp_i64_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxtb
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = shl i64 %x, 56 ; 64-8
@@ -117,8 +112,7 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nou
define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cmp w0, w0, sxth
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i32 %x, -32768 ; ~0U << (16-1)
@@ -129,8 +123,7 @@ define i1 @add_ugecmp_i32_i16(i32 %x) no
define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i32_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cmp w0, w0, sxtb
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i32 %x, -128 ; ~0U << (8-1)
@@ -141,8 +134,7 @@ define i1 @add_ugecmp_i32_i8(i32 %x) nou
define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtw x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxtw
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, -2147483648 ; ~0U << (32-1)
@@ -153,8 +145,7 @@ define i1 @add_ugecmp_i64_i32(i64 %x) no
define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxth
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, -32768 ; ~0U << (16-1)
@@ -165,8 +156,7 @@ define i1 @add_ugecmp_i64_i16(i64 %x) no
define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxtb
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, -128 ; ~0U << (8-1)
@@ -208,8 +198,7 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nou
define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cmp w0, w0, sxth
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i32 %x, 32768 ; 1U << (16-1)
@@ -220,8 +209,7 @@ define i1 @add_ultcmp_i32_i16(i32 %x) no
define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i32_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cmp w0, w0, sxtb
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i32 %x, 128 ; 1U << (8-1)
@@ -232,8 +220,7 @@ define i1 @add_ultcmp_i32_i8(i32 %x) nou
define i1 @add_ultcmp_i64_i32(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtw x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxtw
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
@@ -244,8 +231,7 @@ define i1 @add_ultcmp_i64_i32(i64 %x) no
define i1 @add_ultcmp_i64_i16(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxth
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 32768 ; 1U << (16-1)
@@ -256,8 +242,7 @@ define i1 @add_ultcmp_i64_i16(i64 %x) no
define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb x8, w0
-; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cmp x0, w0, sxtb
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 128 ; 1U << (8-1)
Added: llvm/trunk/test/CodeGen/AArch64/swap-compare-operands.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/swap-compare-operands.ll?rev=344439&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/swap-compare-operands.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/swap-compare-operands.ll Sat Oct 13 00:43:56 2018
@@ -0,0 +1,632 @@
+; RUN: llc < %s -mtriple=arm64 | FileCheck %s
+
+define i1 @testSwapCmpWithLSL64_1(i64 %a, i64 %b) {
+; CHECK-LABEL testSwapCmpWithLSL64_1:
+; CHECK: cmp x1, x0, lsl #1
+; CHECK-NEXT: cset w0, gt
+entry:
+ %shl = shl i64 %a, 1
+ %cmp = icmp slt i64 %shl, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithLSL64_63(i64 %a, i64 %b) {
+; CHECK-LABEL testSwapCmpWithLSL64_63:
+; CHECK: cmp x1, x0, lsl #63
+; CHECK-NEXT: cset w0, gt
+entry:
+ %shl = shl i64 %a, 63
+ %cmp = icmp slt i64 %shl, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithLSL32_1(i32 %a, i32 %b) {
+; CHECK-LABEL testSwapCmpWithLSL32_1:
+; CHECK: cmp w1, w0, lsl #1
+; CHECK-NEXT: cset w0, gt
+entry:
+ %shl = shl i32 %a, 1
+ %cmp = icmp slt i32 %shl, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithLSL32_31(i32 %a, i32 %b) {
+; CHECK-LABEL testSwapCmpWithLSL32_31:
+; CHECK: cmp w1, w0, lsl #31
+; CHECK-NEXT: cset w0, gt
+entry:
+ %shl = shl i32 %a, 31
+ %cmp = icmp slt i32 %shl, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithLSR64_1(i64 %a, i64 %b) {
+; CHECK-LABEL testSwapCmpWithLSR64_1:
+; CHECK: cmp x1, x0, lsr #1
+; CHECK-NEXT: cset w0, gt
+entry:
+ %lshr = lshr i64 %a, 1
+ %cmp = icmp slt i64 %lshr, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithLSR64_63(i64 %a, i64 %b) {
+; CHECK-LABEL testSwapCmpWithLSR64_63:
+; CHECK: cmp x1, x0, lsr #63
+; CHECK-NEXT: cset w0, gt
+entry:
+ %lshr = lshr i64 %a, 63
+ %cmp = icmp slt i64 %lshr, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithLSR32_1(i32 %a, i32 %b) {
+; CHECK-LABEL testSwapCmpWithLSR32_1:
+; CHECK: cmp w1, w0, lsr #1
+; CHECK-NEXT: cset w0, gt
+entry:
+ %lshr = lshr i32 %a, 1
+ %cmp = icmp slt i32 %lshr, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithLSR32_31(i32 %a, i32 %b) {
+; CHECK-LABEL testSwapCmpWithLSR32_31:
+; CHECK: cmp w1, w0, lsr #31
+; CHECK-NEXT: cset w0, gt
+entry:
+ %lshr = lshr i32 %a, 31
+ %cmp = icmp slt i32 %lshr, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithASR64_1(i64 %a, i64 %b) {
+; CHECK-LABEL testSwapCmpWithASR64_1:
+; CHECK: cmp x1, x0, asr #1
+; CHECK-NEXT: cset w0, gt
+entry:
+ %ashr = ashr i64 %a, 1
+ %cmp = icmp slt i64 %ashr, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithASR64_63(i64 %a, i64 %b) {
+; CHECK-LABEL testSwapCmpWithASR64_63:
+; CHECK: cmp x1, x0, asr #63
+; CHECK-NEXT: cset w0, gt
+entry:
+ %ashr = ashr i64 %a, 63
+ %cmp = icmp slt i64 %ashr, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithASR32_1(i32 %a, i32 %b) {
+; CHECK-LABEL testSwapCmpWithASR32_1:
+; CHECK: cmp w1, w0, asr #1
+; CHECK-NEXT: cset w0, gt
+entry:
+ %ashr = ashr i32 %a, 1
+ %cmp = icmp slt i32 %ashr, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithASR32_31(i32 %a, i32 %b) {
+; CHECK-LABEL testSwapCmpWithASR32_31:
+; CHECK: cmp w1, w0, asr #31
+; CHECK-NEXT: cset w0, gt
+entry:
+ %ashr = ashr i32 %a, 31
+ %cmp = icmp slt i32 %ashr, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithShiftedZeroExtend32_64(i32 %a, i64 %b) {
+; CHECK-LABEL testSwapCmpWithShiftedZeroExtend32_64
+; CHECK: cmp x1, w0, uxtw #2
+; CHECK-NEXT: cset w0, lo
+entry:
+ %a64 = zext i32 %a to i64
+ %shl.0 = shl i64 %a64, 2
+ %cmp = icmp ugt i64 %shl.0, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithShiftedZeroExtend16_64(i16 %a, i64 %b) {
+; CHECK-LABEL testSwapCmpWithShiftedZeroExtend16_64
+; CHECK: cmp x1, w0, uxth #2
+; CHECK-NEXT: cset w0, lo
+entry:
+ %a64 = zext i16 %a to i64
+ %shl.0 = shl i64 %a64, 2
+ %cmp = icmp ugt i64 %shl.0, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithShiftedZeroExtend8_64(i8 %a, i64 %b) {
+; CHECK-LABEL testSwapCmpWithShiftedZeroExtend8_64
+; CHECK: cmp x1, w0, uxtb #4
+; CHECK-NEXT: cset w0, lo
+entry:
+ %a64 = zext i8 %a to i64
+ %shl.2 = shl i64 %a64, 4
+ %cmp = icmp ugt i64 %shl.2, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithShiftedZeroExtend16_32(i16 %a, i32 %b) {
+; CHECK-LABEL testSwapCmpWithShiftedZeroExtend8_64
+; CHECK: cmp w1, w0, uxth #3
+; CHECK-NEXT: cset w0, lo
+entry:
+ %a32 = zext i16 %a to i32
+ %shl = shl i32 %a32, 3
+ %cmp = icmp ugt i32 %shl, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithShiftedZeroExtend8_32(i8 %a, i32 %b) {
+; CHECK-LABEL testSwapCmpWithShiftedZeroExtend8_64
+; CHECK: cmp w1, w0, uxtb #4
+; CHECK-NEXT: cset w0, lo
+entry:
+ %a32 = zext i8 %a to i32
+ %shl = shl i32 %a32, 4
+ %cmp = icmp ugt i32 %shl, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithTooLargeShiftedZeroExtend8_32(i8 %a, i32 %b) {
+; CHECK-LABEL testSwapCmpWithTooLargeShiftedZeroExtend8_64
+; CHECK: and [[REG:w[0-9]+]], w0, #0xff
+; CHECK: cmp w1, [[REG]], lsl #5
+; CHECK-NEXT: cset w0, lo
+entry:
+ %a32 = zext i8 %a to i32
+ %shl = shl i32 %a32, 5
+ %cmp = icmp ugt i32 %shl, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithZeroExtend8_32(i8 %a, i32 %b) {
+; CHECK-LABEL testSwapCmpWithZeroExtend8_64
+; CHECK: cmp w1, w0, uxtb
+; CHECK-NEXT: cset w0, lo
+entry:
+ %a32 = zext i8 %a to i32
+ %cmp = icmp ugt i32 %a32, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithShiftedSignExtend32_64(i32 %a, i64 %b) {
+; CHECK-LABEL testSwapCmpWithShiftedZeroExtend32_64
+; CHECK: cmp x1, w0, sxtw #2
+; CHECK-NEXT: cset w0, lo
+entry:
+ %a64 = sext i32 %a to i64
+ %shl.0 = shl i64 %a64, 2
+ %cmp = icmp ugt i64 %shl.0, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithShiftedSignExtend16_64(i16 %a, i64 %b) {
+; CHECK-LABEL testSwapCmpWithShiftedZeroExtend16_64
+; CHECK: cmp x1, w0, sxth #2
+; CHECK-NEXT: cset w0, lo
+entry:
+ %a64 = sext i16 %a to i64
+ %shl.0 = shl i64 %a64, 2
+ %cmp = icmp ugt i64 %shl.0, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithShiftedSignExtend8_64(i8 %a, i64 %b) {
+; CHECK-LABEL testSwapCmpWithShiftedSignExtend8_64
+; CHECK: cmp x1, w0, sxtb #4
+; CHECK-NEXT: cset w0, lo
+entry:
+ %a64 = sext i8 %a to i64
+ %shl.2 = shl i64 %a64, 4
+ %cmp = icmp ugt i64 %shl.2, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithShiftedSignExtend16_32(i16 %a, i32 %b) {
+; CHECK-LABEL testSwapCmpWithShiftedSignExtend8_64
+; CHECK: cmp w1, w0, sxth #3
+; CHECK-NEXT: cset w0, lo
+entry:
+ %a32 = sext i16 %a to i32
+ %shl = shl i32 %a32, 3
+ %cmp = icmp ugt i32 %shl, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithShiftedSignExtend8_32(i8 %a, i32 %b) {
+; CHECK-LABEL testSwapCmpWithShiftedSignExtend8_64
+; CHECK: cmp w1, w0, sxtb #4
+; CHECK-NEXT: cset w0, lo
+entry:
+ %a32 = sext i8 %a to i32
+ %shl = shl i32 %a32, 4
+ %cmp = icmp ugt i32 %shl, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithTooLargeShiftedSignExtend8_32(i8 %a, i32 %b) {
+; CHECK-LABEL testSwapCmpWithTooLargeShiftedSignExtend8_64
+; CHECK: sxtb [[REG:w[0-9]+]], w0
+; CHECK-NEXT: cmp w1, [[REG]], lsl #5
+; CHECK-NEXT: cset w0, lo
+entry:
+ %a32 = sext i8 %a to i32
+ %shl = shl i32 %a32, 5
+ %cmp = icmp ugt i32 %shl, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmpWithSignExtend8_32(i8 %a, i32 %b) {
+; CHECK-LABEL testSwapCmpWithSignExtend8_64
+; CHECK: cmp w1, w0, sxtb
+; CHECK-NEXT: cset w0, lo
+entry:
+ %a32 = sext i8 %a to i32
+ %cmp = icmp ugt i32 %a32, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmnWithLSL64_1(i64 %a, i64 %b) {
+; CHECK-LABEL testSwapCmnWithLSL64_1:
+; CHECK: cmn x1, x0, lsl #1
+; CHECK-NEXT: cset w0, ne
+entry:
+ %shl = shl i64 %a, 1
+ %na = sub i64 0, %shl
+ %cmp = icmp ne i64 %na, %b
+ ret i1 %cmp
+}
+
+; Note: testing with a 62 bits shift as 63 has another optimization kicking in.
+define i1 @testSwapCmnWithLSL64_62(i64 %a, i64 %b) {
+; CHECK-LABEL testSwapCmnWithLSL64_62:
+; CHECK: cmn x1, x0, lsl #62
+; CHECK-NEXT: cset w0, ne
+entry:
+ %shl = shl i64 %a, 62
+ %na = sub i64 0, %shl
+ %cmp = icmp ne i64 %na, %b
+ ret i1 %cmp
+}
+
+; Note: the 63 bits shift triggers a different optimization path, which leads
+; to a similar result in terms of performances. We try to catch here any change
+; so that this test can be adapted should the optimization be done with the
+; operand swap.
+define i1 @testSwapCmnWithLSL64_63(i64 %a, i64 %b) {
+; CHECK-LABEL testSwapCmnWithLSL64_63:
+; CHECK: cmp x1, x0, lsl #63
+; CHECK-NEXT: cset w0, ne
+entry:
+ %shl = shl i64 %a, 63
+ %na = sub i64 0, %shl
+ %cmp = icmp ne i64 %na, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmnWithLSL32_1(i32 %a, i32 %b) {
+; CHECK-LABEL testSwapCmnWithLSL32_1:
+; CHECK: cmn w1, w0, lsl #1
+; CHECK-NEXT: cset w0, ne
+entry:
+ %shl = shl i32 %a, 1
+ %na = sub i32 0, %shl
+ %cmp = icmp ne i32 %na, %b
+ ret i1 %cmp
+}
+
+; Note: testing with a 30 bits shift as 30 has another optimization kicking in.
+define i1 @testSwapCmnWithLSL32_30(i32 %a, i32 %b) {
+; CHECK-LABEL testSwapCmnWithLSL32_30:
+; CHECK: cmn w1, w0, lsl #30
+; CHECK-NEXT: cset w0, ne
+entry:
+ %shl = shl i32 %a, 30
+ %na = sub i32 0, %shl
+ %cmp = icmp ne i32 %na, %b
+ ret i1 %cmp
+}
+
+; Note: the 31 bits shift triggers a different optimization path, which leads
+; to a similar result in terms of performances. We try to catch here any change
+; so that this test can be adapted should the optimization be done with the
+; operand swap.
+define i1 @testSwapCmnWithLSL32_31(i32 %a, i32 %b) {
+; CHECK-LABEL testSwapCmnWithLSL32_31:
+; CHECK: cmp w1, w0, lsl #31
+; CHECK-NEXT: cset w0, ne
+entry:
+ %shl = shl i32 %a, 31
+ %na = sub i32 0, %shl
+ %cmp = icmp ne i32 %na, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmnWithLSR64_1(i64 %a, i64 %b) {
+; CHECK-LABEL testSwapCmnWithLSR64_1:
+; CHECK: cmn x1, x0, lsr #1
+; CHECK-NEXT: cset w0, ne
+entry:
+ %lshr = lshr i64 %a, 1
+ %na = sub i64 0, %lshr
+ %cmp = icmp ne i64 %na, %b
+ ret i1 %cmp
+}
+
+; Note: testing with a 62 bits shift as 63 has another optimization kicking in.
+define i1 @testSwapCmnWithLSR64_62(i64 %a, i64 %b) {
+; CHECK-LABEL testSwapCmnWithLSR64_62:
+; CHECK: cmn x1, x0, lsr #62
+; CHECK-NEXT: cset w0, ne
+entry:
+ %lshr = lshr i64 %a, 62
+ %na = sub i64 0, %lshr
+ %cmp = icmp ne i64 %na, %b
+ ret i1 %cmp
+}
+
+; Note: the 63 bits shift triggers a different optimization path, which leads
+; to a similar result in terms of performances. We try to catch here any change
+; so that this test can be adapted should the optimization be done with the
+; operand swap.
+define i1 @testSwapCmnWithLSR64_63(i64 %a, i64 %b) {
+; CHECK-LABEL testSwapCmnWithLSR64_63:
+; CHECK: cmp x1, x0, asr #63
+; CHECK-NEXT: cset w0, ne
+entry:
+ %lshr = lshr i64 %a, 63
+ %na = sub i64 0, %lshr
+ %cmp = icmp ne i64 %na, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmnWithLSR32_1(i32 %a, i32 %b) {
+; CHECK-LABEL testSwapCmnWithLSR32_1:
+; CHECK: cmn w1, w0, lsr #1
+; CHECK-NEXT: cset w0, ne
+entry:
+ %lshr = lshr i32 %a, 1
+ %na = sub i32 0, %lshr
+ %cmp = icmp ne i32 %na, %b
+ ret i1 %cmp
+}
+
+; Note: testing with a 30 bits shift as 31 has another optimization kicking in.
+define i1 @testSwapCmnWithLSR32_30(i32 %a, i32 %b) {
+; CHECK-LABEL testSwapCmnWithLSR32_30:
+; CHECK: cmn w1, w0, lsr #30
+; CHECK-NEXT: cset w0, ne
+entry:
+ %lshr = lshr i32 %a, 30
+ %na = sub i32 0, %lshr
+ %cmp = icmp ne i32 %na, %b
+ ret i1 %cmp
+}
+
+; Note: the 31 bits shift triggers a different optimization path, which leads
+; to a similar result in terms of performances. We try to catch here any change
+; so that this test can be adapted should the optimization be done with the
+; operand swap.
+define i1 @testSwapCmnWithLSR32_31(i32 %a, i32 %b) {
+; CHECK-LABEL testSwapCmnWithLSR32_31:
+; CHECK: cmp w1, w0, asr #31
+; CHECK-NEXT: cset w0, ne
+entry:
+ %lshr = lshr i32 %a, 31
+ %na = sub i32 0, %lshr
+ %cmp = icmp ne i32 %na, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmnWithASR64_1(i64 %a, i64 %b) {
+; CHECK-LABEL testSwapCmnWithASR64_1:
+; CHECK: cmn x1, x0, asr #3
+; CHECK-NEXT: cset w0, ne
+entry:
+ %lshr = ashr i64 %a, 3
+ %na = sub i64 0, %lshr
+ %cmp = icmp ne i64 %na, %b
+ ret i1 %cmp
+}
+
+; Note: testing with a 62 bits shift as 63 has another optimization kicking in.
+define i1 @testSwapCmnWithASR64_62(i64 %a, i64 %b) {
+; CHECK-LABEL testSwapCmnWithASR64_62:
+; CHECK: cmn x1, x0, asr #62
+; CHECK-NEXT: cset w0, ne
+entry:
+ %lshr = ashr i64 %a, 62
+ %na = sub i64 0, %lshr
+ %cmp = icmp ne i64 %na, %b
+ ret i1 %cmp
+}
+
+; Note: the 63 bits shift triggers a different optimization path, which leads
+; to a similar result in terms of performances. We try to catch here any change
+; so that this test can be adapted should the optimization be done with the
+; operand swap.
+define i1 @testSwapCmnWithASR64_63(i64 %a, i64 %b) {
+; CHECK-LABEL testSwapCmnWithASR64_63:
+; CHECK: cmp x1, x0, lsr #63
+; CHECK-NEXT: cset w0, ne
+entry:
+ %lshr = ashr i64 %a, 63
+ %na = sub i64 0, %lshr
+ %cmp = icmp ne i64 %na, %b
+ ret i1 %cmp
+}
+
+define i1 @testSwapCmnWithASR32_1(i32 %a, i32 %b) {
+; CHECK-LABEL testSwapCmnWithASR32_1:
+; CHECK: cmn w1, w0, asr #1
+; CHECK-NEXT: cset w0, eq
+entry:
+ %lshr = ashr i32 %a, 1
+ %na = sub i32 0, %lshr
+ %cmp = icmp eq i32 %na, %b
+ ret i1 %cmp
+}
+
+; Note: testing with a 30 bits shift as 31 has another optimization kicking in.
+define i1 @testSwapCmnWithASR32_30(i32 %a, i32 %b) {
+; CHECK-LABEL testSwapCmnWithASR32_30:
+; CHECK: cmn w1, w0, asr #30
+; CHECK-NEXT: cset w0, ne
+entry:
+ %lshr = ashr i32 %a, 30
+ %na = sub i32 0, %lshr
+ %cmp = icmp ne i32 %na, %b
+ ret i1 %cmp
+}
+
+; Note: the 31 bits shift triggers a different optimization path, which leads
+; to a similar result in terms of performances. We try to catch here any change
+; so that this test can be adapted should the optimization be done with the
+; operand swap.
+define i1 @testSwapCmnWithASR32_31(i32 %a, i32 %b) {
+; CHECK-LABEL testSwapCmnWithASR32_31:
+; CHECK: cmp w1, w0, lsr #31
+; CHECK-NEXT: cset w0, ne
+entry:
+ %lshr = ashr i32 %a, 31
+ %na = sub i32 0, %lshr
+ %cmp = icmp ne i32 %na, %b
+ ret i1 %cmp
+}
+
+define i64 @testSwapCmpToCmnWithZeroExtend(i32 %a32, i16 %a16, i8 %a8, i64 %b64, i32 %b32) {
+; CHECK-LABEL testSwapCmpToCmnWithZeroExtend:
+t0:
+ %conv0 = zext i32 %a32 to i64
+ %shl0 = shl i64 %conv0, 1
+ %na0 = sub i64 0, %shl0
+ %cmp0 = icmp ne i64 %na0, %b64
+; CHECK: cmn x3, w0, uxtw #1
+ br i1 %cmp0, label %t1, label %end
+
+t1:
+ %conv1 = zext i16 %a16 to i64
+ %shl1 = shl i64 %conv1, 4
+ %na1 = sub i64 0, %shl1
+ %cmp1 = icmp ne i64 %na1, %b64
+; CHECK: cmn x3, w1, uxth #4
+ br i1 %cmp1, label %t2, label %end
+
+t2:
+ %conv2 = zext i8 %a8 to i64
+ %shl2 = shl i64 %conv2, 3
+ %na2 = sub i64 0, %shl2
+ %cmp2 = icmp ne i64 %na2, %b64
+; CHECK: cmn x3, w2, uxtb #3
+ br i1 %cmp2, label %t3, label %end
+
+t3:
+ %conv3 = zext i16 %a16 to i32
+ %shl3 = shl i32 %conv3, 2
+ %na3 = sub i32 0, %shl3
+ %cmp3 = icmp ne i32 %na3, %b32
+; CHECK: cmn w4, w1, uxth #2
+ br i1 %cmp3, label %t4, label %end
+
+t4:
+ %conv4 = zext i8 %a8 to i32
+ %shl4 = shl i32 %conv4, 1
+ %na4 = sub i32 0, %shl4
+ %cmp4 = icmp ne i32 %na4, %b32
+; CHECK: cmn w4, w2, uxtb #1
+ br i1 %cmp4, label %t5, label %end
+
+t5:
+ %conv5 = zext i8 %a8 to i32
+ %shl5 = shl i32 %conv5, 5
+ %na5 = sub i32 0, %shl5
+ %cmp5 = icmp ne i32 %na5, %b32
+; CHECK: and [[REG:w[0-9]+]], w2, #0xff
+; CHECK: cmn w4, [[REG]], lsl #5
+ br i1 %cmp5, label %t6, label %end
+
+t6:
+ %conv6 = zext i8 %a8 to i32
+ %na6 = sub i32 0, %conv6
+ %cmp6 = icmp ne i32 %na6, %b32
+; CHECK: cmn w4, w2, uxtb
+ br i1 %cmp6, label %t7, label %end
+
+t7:
+ ret i64 0
+
+end:
+ ret i64 1
+}
+define i64 @testSwapCmpToCmnWithSignExtend(i32 %a32, i16 %a16, i8 %a8, i64 %b64, i32 %b32) {
+; CHECK-LABEL testSwapCmpToCmnWithSignExtend:
+t0:
+ %conv0 = sext i32 %a32 to i64
+ %shl0 = shl i64 %conv0, 1
+ %na0 = sub i64 0, %shl0
+ %cmp0 = icmp ne i64 %na0, %b64
+; CHECK: cmn x3, w0, sxtw #1
+ br i1 %cmp0, label %t1, label %end
+
+t1:
+ %conv1 = sext i16 %a16 to i64
+ %shl1 = shl i64 %conv1, 4
+ %na1 = sub i64 0, %shl1
+ %cmp1 = icmp ne i64 %na1, %b64
+; CHECK: cmn x3, w1, sxth #4
+ br i1 %cmp1, label %t2, label %end
+
+t2:
+ %conv2 = sext i8 %a8 to i64
+ %shl2 = shl i64 %conv2, 3
+ %na2 = sub i64 0, %shl2
+ %cmp2 = icmp ne i64 %na2, %b64
+; CHECK: cmn x3, w2, sxtb #3
+ br i1 %cmp2, label %t3, label %end
+
+t3:
+ %conv3 = sext i16 %a16 to i32
+ %shl3 = shl i32 %conv3, 2
+ %na3 = sub i32 0, %shl3
+ %cmp3 = icmp ne i32 %na3, %b32
+; CHECK: cmn w4, w1, sxth #2
+ br i1 %cmp3, label %t4, label %end
+
+t4:
+ %conv4 = sext i8 %a8 to i32
+ %shl4 = shl i32 %conv4, 1
+ %na4 = sub i32 0, %shl4
+ %cmp4 = icmp ne i32 %na4, %b32
+; CHECK: cmn w4, w2, sxtb #1
+ br i1 %cmp4, label %t5, label %end
+
+t5:
+ %conv5 = sext i8 %a8 to i32
+ %shl5 = shl i32 %conv5, 5
+ %na5 = sub i32 0, %shl5
+ %cmp5 = icmp ne i32 %na5, %b32
+; CHECK: sxtb [[REG:w[0-9]+]], w2
+; CHECK: cmn w4, [[REG]], lsl #5
+ br i1 %cmp5, label %t6, label %end
+
+t6:
+ %conv6 = sext i8 %a8 to i32
+ %na6 = sub i32 0, %conv6
+ %cmp6 = icmp ne i32 %na6, %b32
+; CHECK: cmn w4, w2, sxtb
+ br i1 %cmp6, label %t7, label %end
+
+t7:
+ ret i64 0
+
+end:
+ ret i64 1
+}
More information about the llvm-commits
mailing list