[llvm] bb18532 - [AArch64][SVE] Allow llvm.aarch64.sve.st2/3/4 with vectors of pointers.
Eli Friedman via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 18 12:51:45 PDT 2020
Author: Eli Friedman
Date: 2020-08-18T12:51:16-07:00
New Revision: bb18532399cf01c712e18f85ecb1cfb612d664ac
URL: https://github.com/llvm/llvm-project/commit/bb18532399cf01c712e18f85ecb1cfb612d664ac
DIFF: https://github.com/llvm/llvm-project/commit/bb18532399cf01c712e18f85ecb1cfb612d664ac.diff
LOG: [AArch64][SVE] Allow llvm.aarch64.sve.st2/3/4 with vectors of pointers.
This isn't necessaary for ACLE, but could be useful in other situations.
And the change is simple.
Differential Revision: https://reviews.llvm.org/D85251
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2cd2f6717120..b37947495a43 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9462,16 +9462,17 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
/// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
template <unsigned NumVecs>
-static bool setInfoSVEStN(AArch64TargetLowering::IntrinsicInfo &Info,
- const CallInst &CI) {
+static bool
+setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
+ AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
Info.opc = ISD::INTRINSIC_VOID;
// Retrieve EC from first vector argument.
- const EVT VT = EVT::getEVT(CI.getArgOperand(0)->getType());
+ const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
ElementCount EC = VT.getVectorElementCount();
#ifndef NDEBUG
// Check the assumption that all input vectors are the same type.
for (unsigned I = 0; I < NumVecs; ++I)
- assert(VT == EVT::getEVT(CI.getArgOperand(I)->getType()) &&
+ assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
"Invalid type.");
#endif
// memVT is `NumVecs * VT`.
@@ -9494,11 +9495,11 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
auto &DL = I.getModule()->getDataLayout();
switch (Intrinsic) {
case Intrinsic::aarch64_sve_st2:
- return setInfoSVEStN<2>(Info, I);
+ return setInfoSVEStN<2>(*this, DL, Info, I);
case Intrinsic::aarch64_sve_st3:
- return setInfoSVEStN<3>(Info, I);
+ return setInfoSVEStN<3>(*this, DL, Info, I);
case Intrinsic::aarch64_sve_st4:
- return setInfoSVEStN<4>(Info, I);
+ return setInfoSVEStN<4>(*this, DL, Info, I);
case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:
case Intrinsic::aarch64_neon_ld4:
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
index 92877233b2c9..f3fcddbaa2fc 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -108,6 +108,17 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
ret void
}
+define void @st2d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i1> %pred, i8** %addr) {
+; CHECK-LABEL: st2d_ptr:
+; CHECK: st2d { z0.d, z1.d }, p0, [x0]
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sve.st2.nxv2p0i8(<vscale x 2 x i8*> %v0,
+ <vscale x 2 x i8*> %v1,
+ <vscale x 2 x i1> %pred,
+ i8** %addr)
+ ret void
+}
+
;
; ST3B
;
@@ -220,6 +231,18 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
ret void
}
+define void @st3d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i8*> %v2, <vscale x 2 x i1> %pred, i8** %addr) {
+; CHECK-LABEL: st3d_ptr:
+; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0]
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sve.st3.nxv2p0i8(<vscale x 2 x i8*> %v0,
+ <vscale x 2 x i8*> %v1,
+ <vscale x 2 x i8*> %v2,
+ <vscale x 2 x i1> %pred,
+ i8** %addr)
+ ret void
+}
+
;
; ST4B
;
@@ -340,6 +363,18 @@ define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
ret void
}
+define void @st4d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i8*> %v2, <vscale x 2 x i8*> %v3, <vscale x 2 x i1> %pred, i8** %addr) {
+; CHECK-LABEL: st4d_ptr:
+; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
+; CHECK-NEXT: ret
+ call void @llvm.aarch64.sve.st4.nxv2p0i8(<vscale x 2 x i8*> %v0,
+ <vscale x 2 x i8*> %v1,
+ <vscale x 2 x i8*> %v2,
+ <vscale x 2 x i8*> %v3,
+ <vscale x 2 x i1> %pred,
+ i8** %addr)
+ ret void
+}
;
; STNT1B
;
@@ -508,6 +543,7 @@ declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x ha
declare void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
+declare void @llvm.aarch64.sve.st2.nxv2p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i1>, i8** nocapture)
declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
@@ -517,6 +553,7 @@ declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x ha
declare void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
+declare void @llvm.aarch64.sve.st3.nxv2p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i1>, i8** nocapture)
declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
@@ -526,6 +563,7 @@ declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x ha
declare void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
+declare void @llvm.aarch64.sve.st4.nxv2p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i1>, i8** nocapture)
declare void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
declare void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
More information about the llvm-commits
mailing list