r248888 - [ARM][NEON] Use address space in vld([1234]|[234]lane) and vst([1234]|[234]lane) instructions

Wed Sep 30 03:56:56 PDT 2015

Author: jketema
Date: Wed Sep 30 05:56:56 2015
New Revision: 248888

URL: http://llvm.org/viewvc/llvm-project?rev=248888&view=rev
Log:
[ARM][NEON] Use address space in vld([1234]|[234]lane) and vst([1234]|[234]lane) instructions

This is the clang commit associated with llvm r248887.

This commit changes the interface of the vld[1234], vld[234]lane, and vst[1234],
vst[234]lane ARM neon intrinsics and associates an address space with the
pointer that these intrinsics take. This changes, e.g.,

<2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32)

to

<2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8*, i32)

This change ensures that address spaces are fully taken into account in the ARM
target during lowering of interleaved loads and stores.

Differential Revision: http://reviews.llvm.org/D13127


Modified:
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/test/CodeGen/arm-neon-misc.c
    cfe/trunk/test/CodeGen/arm-vector-align.c
    cfe/trunk/test/CodeGen/vld_dup.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=248888&r1=248887&r2=248888&view=diff
==============================================================================

--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed Sep 30 05:56:56 2015
@@ -2895,16 +2895,19 @@ Value *CodeGenFunction::EmitCommonNeonBu
     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
   }
   case NEON::BI__builtin_neon_vld1_v:
-  case NEON::BI__builtin_neon_vld1q_v:
+  case NEON::BI__builtin_neon_vld1q_v: {
+    llvm::Type *Tys[] = {Ty, Int8PtrTy};
     Ops.push_back(getAlignmentValue32(PtrOp0));
-    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vld1");
+    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
+  }
   case NEON::BI__builtin_neon_vld2_v:
   case NEON::BI__builtin_neon_vld2q_v:
   case NEON::BI__builtin_neon_vld3_v:
   case NEON::BI__builtin_neon_vld3q_v:
   case NEON::BI__builtin_neon_vld4_v:
   case NEON::BI__builtin_neon_vld4q_v: {
-    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty);
+    llvm::Type *Tys[] = {Ty, Int8PtrTy};
+    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
     Value *Align = getAlignmentValue32(PtrOp1);
     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
@@ -2927,7 +2930,8 @@ Value *CodeGenFunction::EmitCommonNeonBu
   case NEON::BI__builtin_neon_vld3q_lane_v:
   case NEON::BI__builtin_neon_vld4_lane_v:
   case NEON::BI__builtin_neon_vld4q_lane_v: {
-    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty);
+    llvm::Type *Tys[] = {Ty, Int8PtrTy};
+    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
     for (unsigned I = 2; I < Ops.size() - 1; ++I)
       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
     Ops.push_back(getAlignmentValue32(PtrOp1));
@@ -3046,9 +3050,11 @@ Value *CodeGenFunction::EmitCommonNeonBu
   case NEON::BI__builtin_neon_vst3_lane_v:
   case NEON::BI__builtin_neon_vst3q_lane_v:
   case NEON::BI__builtin_neon_vst4_lane_v:
-  case NEON::BI__builtin_neon_vst4q_lane_v:
+  case NEON::BI__builtin_neon_vst4q_lane_v: {
+    llvm::Type *Tys[] = {Int8PtrTy, Ty};
     Ops.push_back(getAlignmentValue32(PtrOp0));
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "");
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
+  }
   case NEON::BI__builtin_neon_vsubhn_v: {
     llvm::VectorType *SrcTy =
         llvm::VectorType::getExtendedElementVectorType(VTy);
@@ -3776,7 +3782,8 @@ Value *CodeGenFunction::EmitARMBuiltinEx
       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
       // Load the value as a one-element vector.
       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
-      Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty);
+      llvm::Type *Tys[] = {Ty, Int8PtrTy};
+      Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
       Value *Align = getAlignmentValue32(PtrOp0);
       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
       // Combine them.
@@ -3808,7 +3815,8 @@ Value *CodeGenFunction::EmitARMBuiltinEx
         break;
       default: llvm_unreachable("unknown vld_dup intrinsic?");
       }
-      Function *F = CGM.getIntrinsic(Int, Ty);
+      llvm::Type *Tys[] = {Ty, Int8PtrTy};
+      Function *F = CGM.getIntrinsic(Int, Tys);
       llvm::Value *Align = getAlignmentValue32(PtrOp1);
       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
@@ -3827,7 +3835,8 @@ Value *CodeGenFunction::EmitARMBuiltinEx
       break;
     default: llvm_unreachable("unknown vld_dup intrinsic?");
     }
-    Function *F = CGM.getIntrinsic(Int, Ty);
+    llvm::Type *Tys[] = {Ty, Int8PtrTy};
+    Function *F = CGM.getIntrinsic(Int, Tys);
     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
 
     SmallVector<Value*, 6> Args;
@@ -3902,8 +3911,9 @@ Value *CodeGenFunction::EmitARMBuiltinEx
       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
       Ops[2] = getAlignmentValue32(PtrOp0);
+      llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
-                                                 Ops[1]->getType()), Ops);
+                                                 Tys), Ops);
     }
     // fall through
   case NEON::BI__builtin_neon_vst1_lane_v: {

Modified: cfe/trunk/test/CodeGen/arm-neon-misc.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-neon-misc.c?rev=248888&r1=248887&r2=248888&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm-neon-misc.c (original)
+++ cfe/trunk/test/CodeGen/arm-neon-misc.c Wed Sep 30 05:56:56 2015
@@ -14,20 +14,20 @@
 void t1(uint64_t *src, uint8_t *dst) {
 // CHECK: @t1
   uint64x2_t q = vld1q_u64(src);
-// CHECK: call <2 x i64> @llvm.arm.neon.vld1.v2i64
+// CHECK: call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8
   vst1q_lane_u64(dst, q, 1);
 // CHECK: bitcast <16 x i8> %{{.*}} to <2 x i64>
 // CHECK: shufflevector <2 x i64>
-// CHECK: call void @llvm.arm.neon.vst1.v1i64
+// CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64
 }
 
 void t2(uint64_t *src1, uint8_t *src2, uint64x2_t *dst) {
 // CHECK: @t2
     uint64x2_t q = vld1q_u64(src1);
-// CHECK: call <2 x i64> @llvm.arm.neon.vld1.v2i64
+// CHECK: call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8
     q = vld1q_lane_u64(src2, q, 0);
 // CHECK: shufflevector <2 x i64>
-// CHECK: call <1 x i64> @llvm.arm.neon.vld1.v1i64
+// CHECK: call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8
 // CHECK: shufflevector <1 x i64>
     *dst = q;
 // CHECK: store <2 x i64>

Modified: cfe/trunk/test/CodeGen/arm-vector-align.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-vector-align.c?rev=248888&r1=248887&r2=248888&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm-vector-align.c (original)
+++ cfe/trunk/test/CodeGen/arm-vector-align.c Wed Sep 30 05:56:56 2015
@@ -14,9 +14,9 @@
 typedef float AlignedAddr __attribute__ ((aligned (16)));
 void t1(AlignedAddr *addr1, AlignedAddr *addr2) {
 // CHECK: @t1
-// CHECK: call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %{{.*}}, i32 16)
+// CHECK: call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* %{{.*}}, i32 16)
   float32x4_t a = vld1q_f32(addr1);
-// CHECK: call void @llvm.arm.neon.vst1.v4f32(i8* %{{.*}}, <4 x float> %{{.*}}, i32 16)
+// CHECK: call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %{{.*}}, <4 x float> %{{.*}}, i32 16)
   vst1q_f32(addr2, a);
 }
 

Modified: cfe/trunk/test/CodeGen/vld_dup.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/vld_dup.c?rev=248888&r1=248887&r2=248888&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/vld_dup.c (original)
+++ cfe/trunk/test/CodeGen/vld_dup.c Wed Sep 30 05:56:56 2015
@@ -14,7 +14,7 @@ int main(){
     int64_t v7[4];
 
     v1 = vld3_dup_s32(v0);
-// CHECK: [[T168:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8* {{.*}}, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 {{[0-9]+}}, i32 {{[0-9]+}})
+// CHECK: [[T168:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* {{.*}}, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 {{[0-9]+}}, i32 {{[0-9]+}})
 // CHECK-NEXT: [[T169:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[T168]], 0
 // CHECK-NEXT: [[T170:%.*]] = shufflevector <2 x i32> [[T169]], <2 x i32> [[T169]], <2 x i32> zeroinitializer
 // CHECK-NEXT: [[T171:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[T168]], <2 x i32> [[T170]], 0
@@ -26,7 +26,7 @@ int main(){
 // CHECK-NEXT: [[T177:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[T174]], <2 x i32> [[T176]], 2
 
     v3 = vld4_dup_s32(v2);
-// CHECK: [[T178:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* {{.*}}, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 {{[0-9]+}}, i32 {{[0-9]+}})
+// CHECK: [[T178:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* {{.*}}, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 {{[0-9]+}}, i32 {{[0-9]+}})
 // CHECK-NEXT: [[T179:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[T178]], 0
 // CHECK-NEXT: [[T180:%.*]] = shufflevector <2 x i32> [[T179]], <2 x i32> [[T179]], <2 x i32> zeroinitializer
 // CHECK-NEXT: [[T181:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[T178]], <2 x i32> [[T180]], 0
@@ -41,10 +41,10 @@ int main(){
 // CHECK-NEXT: [[T190:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[T187]], <2 x i32> [[T189]], 3
 
     v4 = vld3_dup_s64(v6);
-// CHECK: {{%.*}} = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* {{.*}}, i32 {{[0-9]+}})
+// CHECK: {{%.*}} = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* {{.*}}, i32 {{[0-9]+}})
 
     v5 = vld4_dup_s64(v7);
-// CHECK: {{%.*}} = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* {{.*}}, i32 {{[0-9]+}})
+// CHECK: {{%.*}} = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* {{.*}}, i32 {{[0-9]+}})
 
     return 0;
 }