[PATCH] D118584: [AArch64] Combine ISD::AND into AArch64ISD::ANDS
Dave Green via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 31 00:55:18 PST 2022
dmgreen created this revision.
dmgreen added reviewers: SjoerdMeijer, samtebbs, jaykang10, t.p.northover, labrinea.
Herald added subscribers: pengfei, hiraditya, kristof.beyls.
dmgreen requested review of this revision.
Herald added a project: LLVM.
If we already have a AArch64ISD::ANDS node with identical operands, we can merge any ISD::AND into it, reducing the instruction count by calculating the value and the flags in a single operation. This code is taken from the X86 backend, and could also handle AArch64ISD::ADDS and AArch64ISD::SUBS, but I couldn't find any test cases where it came up.
https://reviews.llvm.org/D118584
Files:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/peephole-and-tst.ll
Index: llvm/test/CodeGen/AArch64/peephole-and-tst.ll
===================================================================
--- llvm/test/CodeGen/AArch64/peephole-and-tst.ll
+++ llvm/test/CodeGen/AArch64/peephole-and-tst.ll
@@ -126,8 +126,7 @@
define i64 @test_and1(i64 %x, i64 %y) {
; CHECK-LABEL: test_and1:
; CHECK: // %bb.0:
-; CHECK-NEXT: and x8, x0, #0x3
-; CHECK-NEXT: tst x0, #0x3
+; CHECK-NEXT: ands x8, x0, #0x3
; CHECK-NEXT: csel x0, x8, x1, eq
; CHECK-NEXT: ret
%a = and i64 %x, 3
@@ -151,21 +150,19 @@
define i64 @test_and3(i64 %x, i64 %y) {
; CHECK-LABEL: test_and3:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: mov x19, x1
; CHECK-NEXT: mov x20, x0
-; CHECK-NEXT: and x21, x0, #0x3
; CHECK-NEXT: bl callee
-; CHECK-NEXT: tst x20, #0x3
-; CHECK-NEXT: csel x0, x21, x19, eq
+; CHECK-NEXT: ands x8, x20, #0x3
+; CHECK-NEXT: csel x0, x8, x19, eq
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
; CHECK-NEXT: ret
%a = and i64 %x, 3
%b = call i64 @callee()
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14078,6 +14078,31 @@
return SDValue();
}
+// Combines for S forms of generic opcodes (AArch64ISD::ANDS into ISD::AND for
+// example). NOTE: This could be used for ANDS and SUBS too, if we can find test
+// cases.
+static SDValue performANDSCombine(SDNode *N, unsigned GenericOpc,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDLoc DL(N);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ // If the flag result isn't used, convert back to a generic opcode.
+ if (!N->hasAnyUseOfValue(1)) {
+ SDValue Res = DCI.DAG.getNode(GenericOpc, DL, VT, LHS, RHS);
+ return DCI.DAG.getMergeValues({Res, DCI.DAG.getConstant(0, DL, MVT::i32)},
+ DL);
+ }
+
+ // Combine identical generic nodes into this node, re-using the result.
+ if (SDNode *GenericAddSub = DCI.DAG.getNodeIfExists(
+ GenericOpc, DCI.DAG.getVTList(VT), {LHS, RHS}))
+ DCI.CombineTo(GenericAddSub, SDValue(N, 0));
+
+ return SDValue();
+}
+
// Attempt to form urhadd(OpA, OpB) from
// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
@@ -17671,6 +17696,8 @@
return performTBZCombine(N, DCI, DAG);
case AArch64ISD::CSEL:
return performCSELCombine(N, DCI, DAG);
+ case AArch64ISD::ANDS:
+ return performANDSCombine(N, ISD::AND, DCI);
case AArch64ISD::DUP:
return performPostLD1Combine(N, DCI, false);
case AArch64ISD::NVCAST:
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D118584.404436.patch
Type: text/x-patch
Size: 3356 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220131/6f64aaea/attachment.bin>
More information about the llvm-commits
mailing list