[PATCH] D114405: Optimize shift and accumulate pattern in AArch64.
Xin Tong via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 22 16:48:37 PST 2021
adriantong1024 created this revision.
adriantong1024 added reviewers: Carrot, paulwalker-arm.
Herald added subscribers: hiraditya, kristof.beyls.
adriantong1024 requested review of this revision.
Herald added a project: LLVM.
AArch64 supports unsigned shift right and accumulate. In case we see a
unsigned shift right followed by an OR. We could turn them into a USRA
instruction, given the operands of the OR has no common bits.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D114405
Files:
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/shift-accumulate.ll
Index: llvm/test/CodeGen/AArch64/shift-accumulate.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/shift-accumulate.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
+
+define dso_local i32 @usra(<16 x i8> %0) local_unnamed_addr #0 align 32 {
+; CHECK-LABEL: usra:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushr v0.16b, v0.16b, #7
+; CHECK-NEXT: usra v0.8h, v0.8h, #7
+ %2 = lshr <16 x i8> %0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ %3 = bitcast <16 x i8> %2 to <8 x i16>
+ %4 = lshr <8 x i16> %3, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+ %5 = or <8 x i16> %4, %3
+ %6 = bitcast <8 x i16> %5 to <16 x i8>
+ %7 = extractelement <16 x i8> %6, i32 0
+ %8 = zext i8 %7 to i32
+ ret i32 %8
+}
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1796,6 +1796,13 @@
Known = KnownBits::commonBits(Known, Known2);
break;
}
+ case AArch64ISD::VLSHR: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+ Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
+ Known = KnownBits::lshr(Known, Known2);
+ break;
+ }
case AArch64ISD::LOADgot:
case AArch64ISD::ADDlow: {
if (!Subtarget->isTargetILP32())
Index: llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -310,6 +310,7 @@
findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
const SDValue &OldBase, const SDValue &OldOffset,
unsigned Scale);
+ void optimizeShiftAccumulate(SDNode *N);
bool tryBitfieldExtractOp(SDNode *N);
bool tryBitfieldExtractOpFromSExt(SDNode *N);
@@ -2730,6 +2731,31 @@
return false;
}
+// Turn an OR into an ADD if it is adding 2 operands with no common bits
+// and one of the operand is a VLSHR.
+//
+// We can select this into a USRA instruction.
+void AArch64DAGToDAGISel::optimizeShiftAccumulate(SDNode *N) {
+ if (N->getOpcode() != ISD::OR)
+ return;
+
+ // Only be able to turn an OR into an ADD if no common bits set.
+ if (!CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1)))
+ return;
+
+ for (const SDValue &Op : N->op_values()) {
+ if (Op->getOpcode() == AArch64ISD::VLSHR) {
+ SmallVector<SDValue, 3> Ops;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ Ops.push_back(N->getOperand(i));
+
+ CurDAG->MorphNodeTo(N, ISD::ADD, CurDAG->getVTList(N->getValueType(0)),
+ Ops);
+ return;
+ }
+ }
+}
+
bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
if (N->getOpcode() != ISD::OR)
return false;
@@ -3469,6 +3495,9 @@
case ISD::OR:
if (tryBitfieldInsertOp(Node))
return;
+
+ // See whether we can turn into a shift accumulate.
+ optimizeShiftAccumulate(Node);
break;
case ISD::EXTRACT_SUBVECTOR: {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D114405.389069.patch
Type: text/x-patch
Size: 3412 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211123/1bc0b813/attachment.bin>
More information about the llvm-commits
mailing list