[PATCH] D33298: [SROA] Fix adjusted pointer type and APInt size when load/store have different address space

Yaxun Liu via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed May 17 14:06:51 PDT 2017


yaxunl created this revision.
Herald added subscribers: tpr, wdng.

Currently there is a bug in SROA::presplitLoadsAndStores which causes assertion in
GEPOperator::accumulateConstantOffset.

Basically it does not consider the situation that the pointer operand of load or store
may be in a non-zero address space and its size may be different from the size of
a pointer in address space 0.

Also when it calls getAdjustedPtr for the load instruction, it should pass the pointer
type based on the pointer operand of the load instruction instead of the store
instruction, which causes invalid bitcast instruction if the pointer operand of the
load instruction is different from the store instruction.

This patch fixes assertion when compiling Blender Cycles kernels for amdgpu backend.


https://reviews.llvm.org/D33298

Files:
  lib/Transforms/Scalar/SROA.cpp
  test/Transforms/SROA/address-spaces.ll


Index: test/Transforms/SROA/address-spaces.ll
===================================================================
--- test/Transforms/SROA/address-spaces.ll
+++ test/Transforms/SROA/address-spaces.ll
@@ -83,3 +83,31 @@
   store i32 addrspace(3)* @l, i32 addrspace(3)** %3, align 8
   ret void
 }
+
+; Test load from and store to non-zero address space.
+define void @test_load_store_diff_addr_space([2 x float] addrspace(1)* %complex1, [2 x float] addrspace(1)* %complex2) {
+; CHECK-LABEL: @test_load_store_diff_addr_space
+; CHECK-NOT: alloca
+; CHECK: load i32, i32 addrspace(1)*
+; CHECK: load i32, i32 addrspace(1)*
+; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
+; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
+  %a = alloca i64
+  %a.cast = bitcast i64* %a to [2 x float]*
+  %a.gep1 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 0
+  %a.gep2 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 1
+  %complex1.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex1, i32 0, i32 0
+  %p1 = bitcast float addrspace(1)* %complex1.gep to i64 addrspace(1)*
+  %v1 = load i64, i64 addrspace(1)* %p1
+  store i64 %v1, i64* %a
+  %f1 = load float, float* %a.gep1
+  %f2 = load float, float* %a.gep2
+  %sum = fadd float %f1, %f2
+  store float %sum, float* %a.gep1
+  store float %sum, float* %a.gep2
+  %v2 = load i64, i64* %a
+  %complex2.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex2, i32 0, i32 0
+  %p2 = bitcast float addrspace(1)* %complex2.gep to i64 addrspace(1)*
+  store i64 %v2, i64 addrspace(1)* %p2
+  ret void
+}
Index: lib/Transforms/Scalar/SROA.cpp
===================================================================
--- lib/Transforms/Scalar/SROA.cpp
+++ lib/Transforms/Scalar/SROA.cpp
@@ -3627,9 +3627,12 @@
             PLoad->getType()->getPointerTo(SI->getPointerAddressSpace());
 
         StoreInst *PStore = IRB.CreateAlignedStore(
-            PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
-                                  APInt(DL.getPointerSizeInBits(), PartOffset),
-                                  PartPtrTy, StoreBasePtr->getName() + "."),
+            PLoad,
+            getAdjustedPtr(
+                IRB, DL, StoreBasePtr,
+                APInt(DL.getPointerSizeInBits(SI->getPointerAddressSpace()),
+                      PartOffset),
+                PartPtrTy, StoreBasePtr->getName() + "."),
             getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
         PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
         DEBUG(dbgs() << "      +" << PartOffset << ":" << *PStore << "\n");
@@ -3698,28 +3701,34 @@
     int Idx = 0, Size = Offsets.Splits.size();
     for (;;) {
       auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
-      auto *PartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace());
+      auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace());
+      auto *LoadPartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
 
       // Either lookup a split load or create one.
       LoadInst *PLoad;
       if (SplitLoads) {
         PLoad = (*SplitLoads)[Idx];
       } else {
         IRB.SetInsertPoint(LI);
         PLoad = IRB.CreateAlignedLoad(
-            getAdjustedPtr(IRB, DL, LoadBasePtr,
-                           APInt(DL.getPointerSizeInBits(), PartOffset),
-                           PartPtrTy, LoadBasePtr->getName() + "."),
+            getAdjustedPtr(
+                IRB, DL, LoadBasePtr,
+                APInt(DL.getPointerSizeInBits(LI->getPointerAddressSpace()),
+                      PartOffset),
+                LoadPartPtrTy, LoadBasePtr->getName() + "."),
             getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
             LI->getName());
       }
 
       // And store this partition.
       IRB.SetInsertPoint(SI);
       StoreInst *PStore = IRB.CreateAlignedStore(
-          PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
-                                APInt(DL.getPointerSizeInBits(), PartOffset),
-                                PartPtrTy, StoreBasePtr->getName() + "."),
+          PLoad,
+          getAdjustedPtr(
+              IRB, DL, StoreBasePtr,
+              APInt(DL.getPointerSizeInBits(SI->getPointerAddressSpace()),
+                    PartOffset),
+              StorePartPtrTy, StoreBasePtr->getName() + "."),
           getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
 
       // Now build a new slice for the alloca.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D33298.99346.patch
Type: text/x-patch
Size: 4525 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170517/fb287849/attachment.bin>


More information about the llvm-commits mailing list