[llvm] cd3d7bf - [AArch64][SVE] Add DAG-Combine to push bitcasts from floating point loads after DUPLANE128
Matt Devereau via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 21 04:00:33 PDT 2022
Author: Matt Devereau
Date: 2022-07-21T11:00:10Z
New Revision: cd3d7bf15d3e64ca2572c1711df2b849cada8d3b
URL: https://github.com/llvm/llvm-project/commit/cd3d7bf15d3e64ca2572c1711df2b849cada8d3b
DIFF: https://github.com/llvm/llvm-project/commit/cd3d7bf15d3e64ca2572c1711df2b849cada8d3b.diff
LOG: [AArch64][SVE] Add DAG-Combine to push bitcasts from floating point loads after DUPLANE128
This patch lowers
duplane128(insert_subvector(undef, bitcast(op(128bitsubvec)), 0), 0)
to
bitcast(duplane128(insert_subvector(undef, op(128bitsubvec), 0), 0)).
This enables floating-point loads to match patterns added in
https://reviews.llvm.org/D130010
Differential Revision: https://reviews.llvm.org/D130013
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-ld1r.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cb20554e6a6b..52f026456f02 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19256,6 +19256,41 @@ static SDValue performBSPExpandForSVE(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::OR, DL, VT, Sel, SelInv);
}
+static SDValue performDupLane128Combine(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+
+ SDValue Insert = N->getOperand(0);
+ if (Insert.getOpcode() != ISD::INSERT_SUBVECTOR)
+ return SDValue();
+
+ if (!Insert.getOperand(0).isUndef())
+ return SDValue();
+
+ uint64_t IdxInsert = Insert.getConstantOperandVal(2);
+ uint64_t IdxDupLane = N->getConstantOperandVal(1);
+ if (IdxInsert != IdxDupLane)
+ return SDValue();
+
+ SDValue Bitcast = Insert.getOperand(1);
+ if (Bitcast.getOpcode() != ISD::BITCAST)
+ return SDValue();
+
+ SDValue Subvec = Bitcast.getOperand(0);
+ EVT SubvecVT = Subvec.getValueType();
+ if (!SubvecVT.is128BitVector())
+ return SDValue();
+ EVT NewSubvecVT =
+ getPackedSVEVectorVT(Subvec.getValueType().getVectorElementType());
+
+ SDLoc DL(N);
+ SDValue NewInsert =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewSubvecVT,
+ DAG.getUNDEF(NewSubvecVT), Subvec, Insert->getOperand(2));
+ SDValue NewDuplane128 = DAG.getNode(AArch64ISD::DUPLANE128, DL, NewSubvecVT,
+ NewInsert, N->getOperand(1));
+ return DAG.getNode(ISD::BITCAST, DL, VT, NewDuplane128);
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -19342,6 +19377,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performCSELCombine(N, DCI, DAG);
case AArch64ISD::DUP:
return performDUPCombine(N, DCI);
+ case AArch64ISD::DUPLANE128:
+ return performDupLane128Combine(N, DAG);
case AArch64ISD::NVCAST:
return performNVCASTCombine(N);
case AArch64ISD::SPLICE:
diff --git a/llvm/test/CodeGen/AArch64/sve-ld1r.ll b/llvm/test/CodeGen/AArch64/sve-ld1r.ll
index eafa10754b44..ce019e5d220c 100644
--- a/llvm/test/CodeGen/AArch64/sve-ld1r.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ld1r.ll
@@ -726,8 +726,8 @@ define <vscale x 2 x double> @ld1rd_double_gep_out_of_range_down(double* %valp)
define <vscale x 2 x double> @dupq_ld1rqd_f64(<2 x double>* %a) {
; CHECK-LABEL: dupq_ld1rqd_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: mov z0.q, q0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1rqd { z0.d }, p0/z, [x0]
; CHECK-NEXT: ret
%1 = load <2 x double>, <2 x double>* %a
%2 = tail call fast <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> undef, <2 x double> %1, i64 0)
@@ -738,8 +738,8 @@ define <vscale x 2 x double> @dupq_ld1rqd_f64(<2 x double>* %a) {
define <vscale x 4 x float> @dupq_ld1rqw_f32(<4 x float>* %a) {
; CHECK-LABEL: dupq_ld1rqw_f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: mov z0.q, q0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1rqw { z0.s }, p0/z, [x0]
; CHECK-NEXT: ret
%1 = load <4 x float>, <4 x float>* %a
%2 = tail call fast <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %1, i64 0)
@@ -750,8 +750,8 @@ define <vscale x 4 x float> @dupq_ld1rqw_f32(<4 x float>* %a) {
define <vscale x 8 x half> @dupq_ld1rqh_f16(<8 x half>* %a) {
; CHECK-LABEL: dupq_ld1rqh_f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: mov z0.q, q0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0]
; CHECK-NEXT: ret
%1 = load <8 x half>, <8 x half>* %a
%2 = tail call fast <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> undef, <8 x half> %1, i64 0)
@@ -762,8 +762,8 @@ define <vscale x 8 x half> @dupq_ld1rqh_f16(<8 x half>* %a) {
define <vscale x 8 x bfloat> @dupq_ld1rqh_bf16(<8 x bfloat>* %a) #0 {
; CHECK-LABEL: dupq_ld1rqh_bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: mov z0.q, q0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0]
; CHECK-NEXT: ret
%1 = load <8 x bfloat>, <8 x bfloat>* %a
%2 = tail call fast <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> undef, <8 x bfloat> %1, i64 0)
More information about the llvm-commits
mailing list