[llvm] [AArch64] - Fold and and cmp into tst (PR #110347)
Jorge Botto via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 27 17:48:18 PDT 2024
https://github.com/jf-botto updated https://github.com/llvm/llvm-project/pull/110347
>From 1393eb5ea77c5bdf48da116d5e6a2ba31b69b9e6 Mon Sep 17 00:00:00 2001
From: Jorge Botto <jorge.botto.16 at ucl.ac.uk>
Date: Fri, 27 Sep 2024 17:44:23 +0100
Subject: [PATCH 1/2] precommit test
---
llvm/test/CodeGen/AArch64/pr102703.ll | 80 +++++++++++++++++++++++++++
1 file changed, 80 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/pr102703.ll
diff --git a/llvm/test/CodeGen/AArch64/pr102703.ll b/llvm/test/CodeGen/AArch64/pr102703.ll
new file mode 100644
index 00000000000000..dfe1d0e264e880
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr102703.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -O1 -mtriple=aarch64 | FileCheck %s
+
+define i1 @lt2_u8(i8 %0) {
+; CHECK-LABEL: lt2_u8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: cmp w8, #2
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %2 = icmp ult i8 %0, 2
+ ret i1 %2
+}
+
+define i1 @lt4_u8(i8 %0) {
+; CHECK-LABEL: lt4_u8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: cmp w8, #4
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %2 = icmp ult i8 %0, 4
+ ret i1 %2
+}
+
+define i1 @lt8_u8(i8 %0) {
+; CHECK-LABEL: lt8_u8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: cmp w8, #8
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %2 = icmp ult i8 %0, 8
+ ret i1 %2
+}
+
+define i1 @lt16_u8(i8 %0) {
+; CHECK-LABEL: lt16_u8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: cmp w8, #16
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %2 = icmp ult i8 %0, 16
+ ret i1 %2
+}
+
+define i1 @lt32_u8(i8 %0) {
+; CHECK-LABEL: lt32_u8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: cmp w8, #32
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %2 = icmp ult i8 %0, 32
+ ret i1 %2
+}
+
+define i1 @lt64_u8(i8 %0) {
+; CHECK-LABEL: lt64_u8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: cmp w8, #64
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %2 = icmp ult i8 %0, 64
+ ret i1 %2
+}
+
+; negative test
+define i1 @lt3_u8(i8 %0) {
+; CHECK-LABEL: lt3_u8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: cmp w8, #3
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %2 = icmp ult i8 %0, 3
+ ret i1 %2
+}
>From c2352abab97fc56c6f74f9dde43ca9156a6ab751 Mon Sep 17 00:00:00 2001
From: Jorge Botto <jorge.botto.16 at ucl.ac.uk>
Date: Sat, 28 Sep 2024 01:48:02 +0100
Subject: [PATCH 2/2] Adding the missed optimisation
---
.../Target/AArch64/AArch64ISelLowering.cpp | 33 +++++++++++++++++++
llvm/test/CodeGen/AArch64/pr102703.ll | 30 +++++++----------
.../AArch64/signed-truncation-check.ll | 25 ++++++--------
3 files changed, 55 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4166d9bd22bc01..3f0282e8689dc4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4299,6 +4299,36 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(1));
}
+// Converts SETCC (AND X Y) Z ULT -> SETCC (AND X (Y & ~(Z - 1)) 0 EQ when Y is
+// a power of 2. This is then lowered to ANDS X (Y & ~(Z - 1)) which produces a
+// better opt with EmitComparison.
+static void SimplifySetCCIntoEq(ISD::CondCode &CC, SDValue &LHS, SDValue &RHS,
+ SelectionDAG &DAG, const SDLoc DL) {
+ switch (CC) {
+ default:
+ break;
+ case ISD::SETULT:
+ if (LHS.getOpcode() == ISD::AND) {
+ ConstantSDNode *LHSAndConst = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
+ ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
+ if (LHSAndConst && RHSConst && LHSAndConst->hasOneUse() &&
+ RHSConst->hasOneUse()) {
+ uint64_t LHSAndConstValue = LHSAndConst->getZExtValue();
+ uint64_t RHSConstValue = RHSConst->getZExtValue();
+ if (isPowerOf2_64(RHSConstValue)) {
+ uint64_t NewMaskValue = LHSAndConstValue & ~(RHSConstValue - 1);
+ LHS = DAG.getNode(
+ ISD::AND, DL, LHS.getValueType(), LHS.getOperand(0),
+ DAG.getConstant(NewMaskValue, DL, LHS.getValueType()));
+ RHS = DAG.getConstant(0, DL, RHS.getValueType());
+ CC = ISD::SETEQ;
+ }
+ }
+ }
+ break;
+ }
+}
+
SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -10587,6 +10617,9 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
}
if (LHS.getValueType().isInteger()) {
+
+ SimplifySetCCIntoEq(CC, LHS, RHS, DAG, dl);
+
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(
LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);
diff --git a/llvm/test/CodeGen/AArch64/pr102703.ll b/llvm/test/CodeGen/AArch64/pr102703.ll
index dfe1d0e264e880..dcf2d65ce27368 100644
--- a/llvm/test/CodeGen/AArch64/pr102703.ll
+++ b/llvm/test/CodeGen/AArch64/pr102703.ll
@@ -4,9 +4,8 @@
define i1 @lt2_u8(i8 %0) {
; CHECK-LABEL: lt2_u8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: cmp w8, #2
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: tst w0, #0xfe
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%2 = icmp ult i8 %0, 2
ret i1 %2
@@ -15,9 +14,8 @@ define i1 @lt2_u8(i8 %0) {
define i1 @lt4_u8(i8 %0) {
; CHECK-LABEL: lt4_u8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: cmp w8, #4
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: tst w0, #0xfc
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%2 = icmp ult i8 %0, 4
ret i1 %2
@@ -26,9 +24,8 @@ define i1 @lt4_u8(i8 %0) {
define i1 @lt8_u8(i8 %0) {
; CHECK-LABEL: lt8_u8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: cmp w8, #8
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: tst w0, #0xf8
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%2 = icmp ult i8 %0, 8
ret i1 %2
@@ -37,9 +34,8 @@ define i1 @lt8_u8(i8 %0) {
define i1 @lt16_u8(i8 %0) {
; CHECK-LABEL: lt16_u8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: cmp w8, #16
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: tst w0, #0xf0
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%2 = icmp ult i8 %0, 16
ret i1 %2
@@ -48,9 +44,8 @@ define i1 @lt16_u8(i8 %0) {
define i1 @lt32_u8(i8 %0) {
; CHECK-LABEL: lt32_u8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: cmp w8, #32
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: tst w0, #0xe0
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%2 = icmp ult i8 %0, 32
ret i1 %2
@@ -59,9 +54,8 @@ define i1 @lt32_u8(i8 %0) {
define i1 @lt64_u8(i8 %0) {
; CHECK-LABEL: lt64_u8:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: cmp w8, #64
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: tst w0, #0xc0
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%2 = icmp ult i8 %0, 64
ret i1 %2
diff --git a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
index bb4df6d8935b1b..7c80f9320faec1 100644
--- a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
+++ b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
@@ -287,9 +287,8 @@ define i1 @add_ultcmp_bad_i16_i8_add(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: add_ultcmp_bad_i16_i8_add:
; CHECK: // %bb.0:
; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: and w8, w8, #0xffff
-; CHECK-NEXT: cmp w8, #256
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: tst w8, #0xff00
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i16 %x, %y
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
@@ -328,9 +327,8 @@ define i1 @add_ultcmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind {
; CHECK-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo:
; CHECK: // %bb.0:
; CHECK-NEXT: add w8, w0, #192
-; CHECK-NEXT: and w8, w8, #0xffff
-; CHECK-NEXT: cmp w8, #256
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: tst w8, #0xff00
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i16 %x, 192 ; (1U << (8-1)) + (1U << (8-1-1))
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
@@ -356,9 +354,8 @@ define i1 @add_ultcmp_bad_i16_i8_magic(i16 %x) nounwind {
; CHECK-LABEL: add_ultcmp_bad_i16_i8_magic:
; CHECK: // %bb.0:
; CHECK-NEXT: add w8, w0, #64
-; CHECK-NEXT: and w8, w8, #0xffff
-; CHECK-NEXT: cmp w8, #256
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: tst w8, #0xff00
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i16 %x, 64 ; 1U << (8-1-1)
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
@@ -370,9 +367,8 @@ define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind {
; CHECK-LABEL: add_ultcmp_bad_i16_i4:
; CHECK: // %bb.0:
; CHECK-NEXT: add w8, w0, #8
-; CHECK-NEXT: and w8, w8, #0xffff
-; CHECK-NEXT: cmp w8, #16
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: tst w8, #0xfff0
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i16 %x, 8 ; 1U << (4-1)
%tmp1 = icmp ult i16 %tmp0, 16 ; 1U << 4
@@ -384,9 +380,8 @@ define i1 @add_ultcmp_bad_i24_i8(i24 %x) nounwind {
; CHECK-LABEL: add_ultcmp_bad_i24_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: add w8, w0, #128
-; CHECK-NEXT: and w8, w8, #0xffffff
-; CHECK-NEXT: cmp w8, #256
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: tst w8, #0xffff00
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i24 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i24 %tmp0, 256 ; 1U << 8
More information about the llvm-commits
mailing list