[llvm] cabfcf8 - [ARM] Fix conditions for lowering to S[LR]I
Petre-Ionut Tudor via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 17 09:21:24 PDT 2020
Author: Petre-Ionut Tudor
Date: 2020-04-17T17:19:24+01:00
New Revision: cabfcf840a9d15d92466c6774953d3aa399cde92
URL: https://github.com/llvm/llvm-project/commit/cabfcf840a9d15d92466c6774953d3aa399cde92
DIFF: https://github.com/llvm/llvm-project/commit/cabfcf840a9d15d92466c6774953d3aa399cde92.diff
LOG: [ARM] Fix conditions for lowering to S[LR]I
Summary:
Fixed wrong conditions for generating (S[LR]I X, Y, C2) from
(or (and X, BvecC1), (lsl Y, C2)) and added ISel nodes to lower to S[LR]I. The
optimisation is also enabled by default now.
Subscribers: kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D77387
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f285b4b6ecf6..a52dd21d44b1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -99,11 +99,6 @@ STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumShiftInserts, "Number of vector shift inserts");
STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
-static cl::opt<bool>
-EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
- cl::desc("Allow AArch64 SLI/SRI formation"),
- cl::init(false));
-
// FIXME: The necessary dtprel relocations don't seem to be supported
// well in the GNU bfd and gold linkers at the moment. Therefore, by
// default, for now, fall back to GeneralDynamic code generation.
@@ -1323,6 +1318,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
+ case AArch64ISD::VSLI: return "AArch64ISD::VSLI";
+ case AArch64ISD::VSRI: return "AArch64ISD::VSRI";
case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
@@ -3145,6 +3142,23 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
"llvm.eh.recoverfp must take a function as the first argument");
return IncomingFPOp;
}
+
+ case Intrinsic::aarch64_neon_vsri:
+ case Intrinsic::aarch64_neon_vsli: {
+ EVT Ty = Op.getValueType();
+
+ if (!Ty.isVector())
+ report_fatal_error("Unexpected type for aarch64_neon_vsli");
+
+ uint64_t ShiftAmount = Op.getConstantOperandVal(3);
+ unsigned ElemSizeInBits = Ty.getScalarSizeInBits();
+ assert(ShiftAmount <= ElemSizeInBits);
+
+ bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
+ unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
+ return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
+ Op.getOperand(3));
+ }
}
}
@@ -7893,8 +7907,9 @@ static unsigned getIntrinsicID(const SDNode *N) {
// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
-// BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2.
-// Also, logical shift right -> sri, with the same structure.
+// BUILD_VECTORs with constant element C1, C2 is a constant, and:
+// - for the SLI case: C1 == Ones(ElemSizeInBits) >> (ElemSizeInBits - C2)
+// - for the SRI case: C1 == Ones(ElemSizeInBits) << (ElemSizeInBits - C2)
static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
@@ -7927,25 +7942,27 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
if (!isAllConstantBuildVector(And.getOperand(1), C1))
return SDValue();
- // Is C1 == ~C2, taking into account how much one can shift elements of a
- // particular size?
+ // Is C1 == Ones(ElemSizeInBits) << (ElemSizeInBits - C2) or
+ // C1 == Ones(ElemSizeInBits) >> (ElemSizeInBits - C2), taking into account
+ // how much one can shift elements of a particular size?
uint64_t C2 = C2node->getZExtValue();
unsigned ElemSizeInBits = VT.getScalarSizeInBits();
if (C2 > ElemSizeInBits)
return SDValue();
unsigned ElemMask = (1 << ElemSizeInBits) - 1;
- if ((C1 & ElemMask) != (~C2 & ElemMask))
- return SDValue();
+ if (IsShiftRight) {
+ if ((C1 & ElemMask) != ((ElemMask << (ElemSizeInBits - C2)) & ElemMask))
+ return SDValue();
+ } else {
+ if ((C1 & ElemMask) != ((ElemMask >> (ElemSizeInBits - C2)) & ElemMask))
+ return SDValue();
+ }
SDValue X = And.getOperand(0);
SDValue Y = Shift.getOperand(0);
- unsigned Intrin =
- IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli;
- SDValue ResultSLI =
- DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrin, DL, MVT::i32), X, Y,
- Shift.getOperand(1));
+ unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
+ SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1));
LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
LLVM_DEBUG(N->dump(&DAG));
@@ -7959,10 +7976,8 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SelectionDAG &DAG) const {
// Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
- if (EnableAArch64SlrGeneration) {
- if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
- return Res;
- }
+ if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
+ return Res;
EVT VT = Op.getValueType();
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index a1405335b904..3b3fb40301e4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -121,6 +121,10 @@ enum NodeType : unsigned {
SRSHR_I,
URSHR_I,
+ // Vector shift by constant and insert
+ VSLI,
+ VSRI,
+
// Vector comparisons
CMEQ,
CMGE,
@@ -196,8 +200,10 @@ enum NodeType : unsigned {
UMULL,
// Reciprocal estimates and steps.
- FRECPE, FRECPS,
- FRSQRTE, FRSQRTS,
+ FRECPE,
+ FRECPS,
+ FRSQRTE,
+ FRSQRTS,
SUNPKHI,
SUNPKLO,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index f634642a3b59..a0b53eacbc55 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -231,6 +231,10 @@ def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
+def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>,
+ SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>;
+
def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
@@ -469,6 +473,8 @@ def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
+def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
+def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>;
def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
@@ -5828,8 +5834,8 @@ defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
-defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>;
-def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
+defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
+def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
(i32 vecshiftL64:$imm))),
(SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
@@ -5842,8 +5848,8 @@ defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
int_aarch64_neon_sqshrn>;
defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
int_aarch64_neon_sqshrun>;
-defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>;
-def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
+defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
+def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
(i32 vecshiftR64:$imm))),
(SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
index b26542d759e4..204e857e45c8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -aarch64-shift-insert-generation=true -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
define void @testLeftGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
; CHECK-LABEL: testLeftGood:
; CHECK: sli.16b v0, v1, #3
- %and.i = and <16 x i8> %src1, <i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252>
+ %and.i = and <16 x i8> %src1, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%vshl_n = shl <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
%result = or <16 x i8> %and.i, %vshl_n
store <16 x i8> %result, <16 x i8>* %dest, align 16
@@ -23,7 +23,7 @@ define void @testLeftBad(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nou
define void @testRightGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
; CHECK-LABEL: testRightGood:
; CHECK: sri.16b v0, v1, #3
- %and.i = and <16 x i8> %src1, <i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252>
+ %and.i = and <16 x i8> %src1, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
%vshl_n = lshr <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
%result = or <16 x i8> %and.i, %vshl_n
store <16 x i8> %result, <16 x i8>* %dest, align 16
More information about the llvm-commits
mailing list