[llvm] 0b6df40 - [AArch64] Combine ISD::AND into AArch64ISD::ANDS
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 17 02:44:17 PDT 2022
Author: David Green
Date: 2022-03-17T09:44:11Z
New Revision: 0b6df40c52acf8a1af7ad370a12521879e8ef96a
URL: https://github.com/llvm/llvm-project/commit/0b6df40c52acf8a1af7ad370a12521879e8ef96a
DIFF: https://github.com/llvm/llvm-project/commit/0b6df40c52acf8a1af7ad370a12521879e8ef96a.diff
LOG: [AArch64] Combine ISD::AND into AArch64ISD::ANDS
If we already have a AArch64ISD::ANDS node with identical operands, we
can merge any ISD::AND into it, reducing the instruction count by
calculating the value and the flags in a single operation. This code is
taken from the X86 backend, and could also handle AArch64ISD::ADDS and
AArch64ISD::SUBS, but I couldn't find any test cases where it came up.
Differential Revision: https://reviews.llvm.org/D118584
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/peephole-and-tst.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b538e6fff2861..9ebce1120da63 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17351,6 +17351,31 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+// Combines for S forms of generic opcodes (AArch64ISD::ANDS into ISD::AND for
+// example). NOTE: This could be used for ADDS and SUBS too, if we can find test
+// cases.
+static SDValue performANDSCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDLoc DL(N);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ // If the flag result isn't used, convert back to a generic opcode.
+ if (!N->hasAnyUseOfValue(1)) {
+ SDValue Res = DCI.DAG.getNode(ISD::AND, DL, VT, LHS, RHS);
+ return DCI.DAG.getMergeValues({Res, DCI.DAG.getConstant(0, DL, MVT::i32)},
+ DL);
+ }
+
+ // Combine identical generic nodes into this node, re-using the result.
+ if (SDNode *GenericAddSub =
+ DCI.DAG.getNodeIfExists(ISD::AND, DCI.DAG.getVTList(VT), {LHS, RHS}))
+ DCI.CombineTo(GenericAddSub, SDValue(N, 0));
+
+ return SDValue();
+}
+
static SDValue performSetCCPunpkCombine(SDNode *N, SelectionDAG &DAG) {
// setcc_merge_zero pred
// (sign_extend (extract_subvector (setcc_merge_zero ... pred ...))), 0, ne
@@ -18415,6 +18440,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performTBZCombine(N, DCI, DAG);
case AArch64ISD::CSEL:
return performCSELCombine(N, DCI, DAG);
+ case AArch64ISD::ANDS:
+ return performANDSCombine(N, DCI);
case AArch64ISD::DUP:
return performPostLD1Combine(N, DCI, false);
case AArch64ISD::NVCAST:
diff --git a/llvm/test/CodeGen/AArch64/peephole-and-tst.ll b/llvm/test/CodeGen/AArch64/peephole-and-tst.ll
index 696933b996a19..826aea204ef2f 100644
--- a/llvm/test/CodeGen/AArch64/peephole-and-tst.ll
+++ b/llvm/test/CodeGen/AArch64/peephole-and-tst.ll
@@ -126,8 +126,7 @@ do.end: ; preds = %4
define i64 @test_and1(i64 %x, i64 %y) {
; CHECK-LABEL: test_and1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x3
-; CHECK-NEXT: tst x0, #0x3
+; CHECK-NEXT: ands x8, x0, #0x3
; CHECK-NEXT: csel x0, x8, x1, eq
; CHECK-NEXT: ret
%a = and i64 %x, 3
@@ -151,22 +150,20 @@ define i64 @test_and2(i64 %x, i64 %y) {
define i64 @test_and3(i64 %x, i64 %y) {
; CHECK-LABEL: test_and3:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: mov x20, x0
-; CHECK-NEXT: and x21, x0, #0x3
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: mov x19, x1
; CHECK-NEXT: bl callee
-; CHECK-NEXT: tst x20, #0x3
-; CHECK-NEXT: csel x0, x21, x19, eq
+; CHECK-NEXT: ands x8, x20, #0x3
+; CHECK-NEXT: csel x0, x8, x19, eq
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
; CHECK-NEXT: ret
%a = and i64 %x, 3
%b = call i64 @callee(i64 0)
@@ -185,8 +182,7 @@ define i64 @test_and_4(i64 %x, i64 %y) {
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: ands x0, x0, #0x3
; CHECK-NEXT: bl callee
-; CHECK-NEXT: tst x19, #0x3
-; CHECK-NEXT: and x8, x19, #0x3
+; CHECK-NEXT: ands x8, x19, #0x3
; CHECK-NEXT: csel x0, x8, x0, eq
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
More information about the llvm-commits
mailing list