[PATCH] Check address space when optimizing gep+cast

Fri Mar 28 15:46:53 PDT 2014

On Fri, Mar 28, 2014 at 3:05 PM, Eli Bendersky <eliben at google.com> wrote:

>
>
>
> On Fri, Mar 28, 2014 at 2:25 PM, Jingyue Wu <jingyue at google.com> wrote:
>
>> Hi Matt,
>>
>> That makes sense, and here's a more constructive patch.
>>
>>
> +          if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
> +            return Res;
> +          // Insert Res, and create an addrspacecast.
> +          return new AddrSpaceCastInst(Builder->Insert(Res),
> GEP.getType());
>
> Can you provide a bit more details in the comment? The surrounding code
> has examples like:
>
> // -> GEP ... stuff
>
> Maybe something like this to clarify the sequence it turns to.
>

Done


>
>
> + at array = internal addrspace(3) global [256 x float] zeroinitializer,
> align 4
> +
> +define float* @keep_necessary_addrspacecast(i64 %i) {
> +entry:
> +; CHECK-LABEL: @keep_necessary_addrspacecast
> +; CHECK: addrspacecast float addrspace(3)* %{{[0-9]+}} to float*
> +  %0 = getelementptr [256 x float]* addrspacecast ([256 x float]
> addrspace(3)* @array to [256 x float]*), i64 0, i64 %i
> +  ret float* %0
> +}
>
>
> Please create test cases that cover both paths in the patch (array /
> not-array).
>

Done


>
> Eli
>
>
>> Jingyue
>>
>>
>> On Fri, Mar 28, 2014 at 12:07 PM, Matt Arsenault <
>> Matthew.Arsenault at amd.com> wrote:
>>
>>>  On 03/28/2014 12:01 PM, Jingyue Wu wrote:
>>>
>>>  Fixing PR19270. This issue is blocking a waiting patch of mine that
>>> implements the optimization we discussed in
>>> http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-March/071440.html.
>>>
>>>  visitGetElementPtr in InstructionCombining.cpp gets rid of unnecessary
>>> pointer casts in gep (cast X). However, this optimization may change the
>>> address space of the result pointer type, and cause type mismatch.
>>>
>>>  e.g.,
>>> getelementptr [256 x float]* addrspacecast ([256 x float] addrspace(3)*
>>> @array to [256 x float]*), i64 0, i64 %i
>>> returns a float*, but the optimized instruction
>>> getelementptr [256 x float] addrspace(3)* @array, i64 0, i64 %i
>>>  returns a float addrspace(3)*
>>>
>>> The attached patch disables this optimization when the address space of
>>> the source is different from that of the destination.
>>>
>>>  Jingyue
>>>
>>>
>>> If we're doing what I suggested and trying to do operations in the
>>> original address space, then instead of disabling this, why don't you make
>>> this instead just move the addrspacecast to after the GEP? Do the GEP in
>>> addrspace(3), and then addrspacecast that.
>>>
>>> -Matt
>>>
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140328/ea97bd7d/attachment.html>
-------------- next part --------------

diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 0cab81b..1bc5d46 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1241,7 +1241,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           GetElementPtrInst *Res =
             GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName());
           Res->setIsInBounds(GEP.isInBounds());
-          return Res;
+          if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
+            return Res;
+          // Insert Res, and create an addrspacecast.
+          // e.g.,
+          // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ...
+          // ->
+          // %0 = GEP i8 addrspace(1)* X, ...
+          // addrspacecst i8 addrspace(1)* %0 to i8*
+          return new AddrSpaceCastInst(Builder->Insert(Res), GEP.getType());
         }
 
         if (ArrayType *XATy =
@@ -1253,8 +1261,24 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             // to an array of the same type as the destination pointer
             // array.  Because the array type is never stepped over (there
             // is a leading zero) we can fold the cast into this GEP.
-            GEP.setOperand(0, StrippedPtr);
-            return &GEP;
+            if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
+              GEP.setOperand(0, StrippedPtr);
+              return &GEP;
+            }
+            // Cannot replace the base pointer directly because StrippedPtr's
+            // address space is different. Instead, create a new GEP followed by
+            // an addrspacecast.
+            // e.g.,
+            // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*),
+            //   i32 0, ...
+            // ->
+            // %0 = GEP [10 x i8] addrspace(1)* X, ...
+            // addrspacecst i8 addrspace(1)* %0 to i8*
+            SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
+            Value *NewGEP = GEP.isInBounds() ?
+              Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
+              Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+            return new AddrSpaceCastInst(NewGEP, GEP.getType());
           }
         }
       }
diff --git a/test/Transforms/InstCombine/gep-addrspace.ll b/test/Transforms/InstCombine/gep-addrspace.ll
index 24c355d..29511a3 100644
--- a/test/Transforms/InstCombine/gep-addrspace.ll
+++ b/test/Transforms/InstCombine/gep-addrspace.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-pc-win32"
@@ -17,3 +17,18 @@ ST:
   ret void
 }
 
+ at array = internal addrspace(3) global [256 x float] zeroinitializer, align 4
+ at scalar = internal addrspace(3) global float 0.000000e+00, align 4
+
+define void @keep_necessary_addrspacecast(i64 %i, float** %out0, float** %out1) {
+entry:
+; CHECK-LABEL: @keep_necessary_addrspacecast
+  %0 = getelementptr [256 x float]* addrspacecast ([256 x float] addrspace(3)* @array to [256 x float]*), i64 0, i64 %i
+; CHECK: addrspacecast float addrspace(3)* %{{[0-9]+}} to float*
+  %1 = getelementptr [0 x float]* addrspacecast (float addrspace(3)* @scalar to [0 x float]*), i64 0, i64 %i
+; CHECK: addrspacecast float addrspace(3)* %{{[0-9]+}} to float*
+  store float* %0, float** %out0, align 4
+  store float* %1, float** %out1, align 4
+  ret void
+}
+