[llvm] r202247 - [SROA] Teach SROA how to handle pointers from address spaces other than

Chandler Carruth chandlerc at gmail.com
Wed Feb 26 00:25:02 PST 2014


Author: chandlerc
Date: Wed Feb 26 02:25:02 2014
New Revision: 202247

URL: http://llvm.org/viewvc/llvm-project?rev=202247&view=rev
Log:
[SROA] Teach SROA how to handle pointers from address spaces other than
the default.

Based on the patch by Matt Arsenault, D1764!

I switched one place to use the more direct pointer type to compute the
desired address space, and I reworked the memcpy rewriting section to
reflect significant refactorings that this patch helped inspire.

Thanks to several of the folks who helped review and improve the patch
as well.

Added:
    llvm/trunk/test/Transforms/SROA/address-spaces.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/SROA.cpp
    llvm/trunk/test/Transforms/SROA/basictest.ll
    llvm/trunk/test/Transforms/SROA/vector-promotion.ll

Modified: llvm/trunk/lib/Transforms/Scalar/SROA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SROA.cpp?rev=202247&r1=202246&r2=202247&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/SROA.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/SROA.cpp Wed Feb 26 02:25:02 2014
@@ -1401,7 +1401,7 @@ static Value *getNaturalGEPWithOffset(IR
 
   // Don't consider any GEPs through an i8* as natural unless the TargetTy is
   // an i8.
-  if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8))
+  if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8))
     return 0;
 
   Type *ElementTy = Ty->getElementType();
@@ -1503,8 +1503,9 @@ static Value *getAdjustedPtr(IRBuilderTy
 
   if (!OffsetPtr) {
     if (!Int8Ptr) {
-      Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
-                                  NamePrefix + "sroa_raw_cast");
+      Int8Ptr = IRB.CreateBitCast(
+          Ptr, IRB.getInt8PtrTy(PointerTy->getPointerAddressSpace()),
+          NamePrefix + "sroa_raw_cast");
       Int8PtrOffset = Offset;
     }
 
@@ -2559,15 +2560,16 @@ private:
       Pass.Worklist.insert(AI);
     }
 
+    Type *OtherPtrTy = OtherPtr->getType();
+    unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
+
     // Compute the relative offset for the other pointer within the transfer.
-    unsigned IntPtrWidth = DL.getPointerSizeInBits();
+    unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS);
     APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset);
     unsigned OtherAlign = MinAlign(II.getAlignment() ? II.getAlignment() : 1,
                                    OtherOffset.zextOrTrunc(64).getZExtValue());
 
     if (EmitMemCpy) {
-      Type *OtherPtrTy = OtherPtr->getType();
-
       // Compute the other pointer, folding as much as possible to produce
       // a single, simple GEP in most cases.
       OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
@@ -2594,16 +2596,19 @@ private:
     IntegerType *SubIntTy
       = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
 
-    Type *OtherPtrTy = NewAI.getType();
+    // Reset the other pointer type to match the register type we're going to
+    // use, but using the address space of the original other pointer.
     if (VecTy && !IsWholeAlloca) {
       if (NumElements == 1)
         OtherPtrTy = VecTy->getElementType();
       else
         OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements);
 
-      OtherPtrTy = OtherPtrTy->getPointerTo();
+      OtherPtrTy = OtherPtrTy->getPointerTo(OtherAS);
     } else if (IntTy && !IsWholeAlloca) {
-      OtherPtrTy = SubIntTy->getPointerTo();
+      OtherPtrTy = SubIntTy->getPointerTo(OtherAS);
+    } else {
+      OtherPtrTy = NewAllocaTy->getPointerTo(OtherAS);
     }
 
     Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,

Added: llvm/trunk/test/Transforms/SROA/address-spaces.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/address-spaces.ll?rev=202247&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/address-spaces.ll (added)
+++ llvm/trunk/test/Transforms/SROA/address-spaces.ll Wed Feb 26 02:25:02 2014
@@ -0,0 +1,68 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
+declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture readonly, i32, i32, i1)
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1)
+declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1)
+
+
+; Make sure an illegal bitcast isn't introduced
+define void @test_address_space_1_1(<2 x i64> addrspace(1)* %a, i16 addrspace(1)* %b) {
+; CHECK-LABEL: @test_address_space_1_1(
+; CHECK: load <2 x i64> addrspace(1)* %a, align 2
+; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2
+; CHECK: ret void
+  %aa = alloca <2 x i64>, align 16
+  %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)*
+  %aaptr = bitcast <2 x i64>* %aa to i8*
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false)
+  %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)*
+  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
+  ret void
+}
+
+define void @test_address_space_1_0(<2 x i64> addrspace(1)* %a, i16* %b) {
+; CHECK-LABEL: @test_address_space_1_0(
+; CHECK: load <2 x i64> addrspace(1)* %a, align 2
+; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2
+; CHECK: ret void
+  %aa = alloca <2 x i64>, align 16
+  %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)*
+  %aaptr = bitcast <2 x i64>* %aa to i8*
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false)
+  %bptr = bitcast i16* %b to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
+  ret void
+}
+
+define void @test_address_space_0_1(<2 x i64>* %a, i16 addrspace(1)* %b) {
+; CHECK-LABEL: @test_address_space_0_1(
+; CHECK: load <2 x i64>* %a, align 2
+; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2
+; CHECK: ret void
+  %aa = alloca <2 x i64>, align 16
+  %aptr = bitcast <2 x i64>* %a to i8*
+  %aaptr = bitcast <2 x i64>* %aa to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %aaptr, i8* %aptr, i32 16, i32 2, i1 false)
+  %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)*
+  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
+  ret void
+}
+
+%struct.struct_test_27.0.13 = type { i32, float, i64, i8, [4 x i32] }
+
+; Function Attrs: nounwind
+define void @copy_struct([5 x i64] %in.coerce) {
+; CHECK-LABEL: @copy_struct(
+; CHECK-NOT: memcpy
+for.end:
+  %in = alloca %struct.struct_test_27.0.13, align 8
+  %0 = bitcast %struct.struct_test_27.0.13* %in to [5 x i64]*
+  store [5 x i64] %in.coerce, [5 x i64]* %0, align 8
+  %scevgep9 = getelementptr %struct.struct_test_27.0.13* %in, i32 0, i32 4, i32 0
+  %scevgep910 = bitcast i32* %scevgep9 to i8*
+  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* undef, i8* %scevgep910, i32 16, i32 4, i1 false)
+  ret void
+}
+ 

Modified: llvm/trunk/test/Transforms/SROA/basictest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/basictest.ll?rev=202247&r1=202246&r2=202247&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SROA/basictest.ll (original)
+++ llvm/trunk/test/Transforms/SROA/basictest.ll Wed Feb 26 02:25:02 2014
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
 ; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
 declare void @llvm.lifetime.start(i64, i8* nocapture)
 declare void @llvm.lifetime.end(i64, i8* nocapture)
@@ -404,6 +404,7 @@ entry:
 }
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) nounwind
 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
@@ -1149,6 +1150,24 @@ entry:
   ret void
 ; CHECK: ret
 }
+
+define void @PR14105_as1({ [16 x i8] } addrspace(1)* %ptr) {
+; Make sure this the right address space pointer is used for type check.
+; CHECK-LABEL: @PR14105_as1(
+
+entry:
+  %a = alloca { [16 x i8] }, align 8
+; CHECK: alloca [16 x i8], align 8
+
+  %gep = getelementptr inbounds { [16 x i8] } addrspace(1)* %ptr, i64 -1
+; CHECK-NEXT: getelementptr inbounds { [16 x i8] } addrspace(1)* %ptr, i16 -1, i32 0, i64 0
+
+  %cast1 = bitcast { [16 x i8 ] } addrspace(1)* %gep to i8 addrspace(1)*
+  %cast2 = bitcast { [16 x i8 ] }* %a to i8*
+  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %cast1, i8* %cast2, i32 16, i32 8, i1 true)
+  ret void
+; CHECK: ret
+}
 
 define void @PR14465() {
 ; Ensure that we don't crash when analyzing a alloca larger than the maximum

Modified: llvm/trunk/test/Transforms/SROA/vector-promotion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/vector-promotion.ll?rev=202247&r1=202246&r2=202247&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SROA/vector-promotion.ll (original)
+++ llvm/trunk/test/Transforms/SROA/vector-promotion.ll Wed Feb 26 02:25:02 2014
@@ -150,6 +150,53 @@ entry:
 ; CHECK-NEXT: ret
 }
 
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) nounwind
+
+; Same as test4 with a different sized address  space pointer source.
+define i32 @test4_as1(<4 x i32> %x, <4 x i32> %y, <4 x i32> addrspace(1)* %z) {
+; CHECK-LABEL: @test4_as1(
+entry:
+	%a = alloca [2 x <4 x i32>]
+; CHECK-NOT: alloca
+
+  %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
+  store <4 x i32> %x, <4 x i32>* %a.x
+  %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
+  store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
+
+  %a.y.cast = bitcast <4 x i32>* %a.y to i8*
+  %z.cast = bitcast <4 x i32> addrspace(1)* %z to i8 addrspace(1)*
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.y.cast, i8 addrspace(1)* %z.cast, i32 16, i32 1, i1 false)
+; CHECK-NOT: memcpy
+
+  %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+  %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
+  %z.tmp1 = getelementptr inbounds <4 x i32> addrspace(1)* %z, i16 0, i16 2
+  %z.tmp1.cast = bitcast i32 addrspace(1)* %z.tmp1 to i8 addrspace(1)*
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.tmp1.cast, i8 addrspace(1)* %z.tmp1.cast, i32 4, i32 1, i1 false)
+  %tmp1 = load i32* %a.tmp1
+  %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+  %tmp2 = load i32* %a.tmp2
+  %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+  %tmp3 = load i32* %a.tmp3
+; CHECK-NOT: memcpy
+; CHECK:      %[[load:.*]] = load <4 x i32> addrspace(1)* %z
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds <4 x i32> addrspace(1)* %z, i64 0, i64 2
+; CHECK-NEXT: %[[element_load:.*]] = load i32 addrspace(1)* %[[gep]]
+; CHECK-NEXT: %[[insert:.*]] = insertelement <4 x i32> %x, i32 %[[element_load]], i32 2
+; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2
+; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 3
+; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 0
+
+  %tmp4 = add i32 %tmp1, %tmp2
+  %tmp5 = add i32 %tmp3, %tmp4
+  ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
 define i32 @test5(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) {
 ; CHECK-LABEL: @test5(
 ; The same as the above, but with reversed source and destination for the





More information about the llvm-commits mailing list