[llvm] bb18532 - [AArch64][SVE] Allow llvm.aarch64.sve.st2/3/4 with vectors of pointers.

Tue Aug 18 12:51:45 PDT 2020

Author: Eli Friedman
Date: 2020-08-18T12:51:16-07:00
New Revision: bb18532399cf01c712e18f85ecb1cfb612d664ac

URL: https://github.com/llvm/llvm-project/commit/bb18532399cf01c712e18f85ecb1cfb612d664ac
DIFF: https://github.com/llvm/llvm-project/commit/bb18532399cf01c712e18f85ecb1cfb612d664ac.diff

LOG: [AArch64][SVE] Allow llvm.aarch64.sve.st2/3/4 with vectors of pointers.

This isn't necessaary for ACLE, but could be useful in other situations.
And the change is simple.

Differential Revision: https://reviews.llvm.org/D85251

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2cd2f6717120..b37947495a43 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9462,16 +9462,17 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
 
 /// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
 template <unsigned NumVecs>
-static bool setInfoSVEStN(AArch64TargetLowering::IntrinsicInfo &Info,
-                          const CallInst &CI) {
+static bool
+setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
+              AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
   Info.opc = ISD::INTRINSIC_VOID;
   // Retrieve EC from first vector argument.
-  const EVT VT = EVT::getEVT(CI.getArgOperand(0)->getType());
+  const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
   ElementCount EC = VT.getVectorElementCount();
 #ifndef NDEBUG
   // Check the assumption that all input vectors are the same type.
   for (unsigned I = 0; I < NumVecs; ++I)
-    assert(VT == EVT::getEVT(CI.getArgOperand(I)->getType()) &&
+    assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
            "Invalid type.");
 #endif
   // memVT is `NumVecs * VT`.
@@ -9494,11 +9495,11 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   auto &DL = I.getModule()->getDataLayout();
   switch (Intrinsic) {
   case Intrinsic::aarch64_sve_st2:
-    return setInfoSVEStN<2>(Info, I);
+    return setInfoSVEStN<2>(*this, DL, Info, I);
   case Intrinsic::aarch64_sve_st3:
-    return setInfoSVEStN<3>(Info, I);
+    return setInfoSVEStN<3>(*this, DL, Info, I);
   case Intrinsic::aarch64_sve_st4:
-    return setInfoSVEStN<4>(Info, I);
+    return setInfoSVEStN<4>(*this, DL, Info, I);
   case Intrinsic::aarch64_neon_ld2:
   case Intrinsic::aarch64_neon_ld3:
   case Intrinsic::aarch64_neon_ld4:

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
index 92877233b2c9..f3fcddbaa2fc 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -108,6 +108,17 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
   ret void
 }
 
+define void @st2d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i1> %pred, i8** %addr) {
+; CHECK-LABEL: st2d_ptr:
+; CHECK: st2d { z0.d, z1.d }, p0, [x0]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.st2.nxv2p0i8(<vscale x 2 x i8*> %v0,
+                                           <vscale x 2 x i8*> %v1,
+                                           <vscale x 2 x i1> %pred,
+                                           i8** %addr)
+  ret void
+}
+
 ;
 ; ST3B
 ;
@@ -220,6 +231,18 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
   ret void
 }
 
+define void @st3d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i8*> %v2, <vscale x 2 x i1> %pred, i8** %addr) {
+; CHECK-LABEL: st3d_ptr:
+; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.st3.nxv2p0i8(<vscale x 2 x i8*> %v0,
+                                           <vscale x 2 x i8*> %v1,
+                                           <vscale x 2 x i8*> %v2,
+                                           <vscale x 2 x i1> %pred,
+                                           i8** %addr)
+  ret void
+}
+
 ;
 ; ST4B
 ;
@@ -340,6 +363,18 @@ define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
   ret void
 }
 
+define void @st4d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i8*> %v2, <vscale x 2 x i8*> %v3, <vscale x 2 x i1> %pred, i8** %addr) {
+; CHECK-LABEL: st4d_ptr:
+; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
+; CHECK-NEXT: ret
+  call void @llvm.aarch64.sve.st4.nxv2p0i8(<vscale x 2 x i8*> %v0,
+                                           <vscale x 2 x i8*> %v1,
+                                           <vscale x 2 x i8*> %v2,
+                                           <vscale x 2 x i8*> %v3,
+                                           <vscale x 2 x i1> %pred,
+                                           i8** %addr)
+  ret void
+}
 ;
 ; STNT1B
 ;
@@ -508,6 +543,7 @@ declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x ha
 declare void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
 declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
 declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
+declare void @llvm.aarch64.sve.st2.nxv2p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i1>, i8** nocapture)
 
 declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
 declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
@@ -517,6 +553,7 @@ declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x ha
 declare void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
 declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
 declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
+declare void @llvm.aarch64.sve.st3.nxv2p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i1>, i8** nocapture)
 
 declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
 declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
@@ -526,6 +563,7 @@ declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x ha
 declare void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
 declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
 declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
+declare void @llvm.aarch64.sve.st4.nxv2p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i1>, i8** nocapture)
 
 declare void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
 declare void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16*)