[PATCH] Check address space when optimizing gep+cast

Fri Mar 28 15:53:59 PDT 2014

fixed a typo


On Fri, Mar 28, 2014 at 3:46 PM, Jingyue Wu <jingyue at google.com> wrote:

>
>
> On Fri, Mar 28, 2014 at 3:05 PM, Eli Bendersky <eliben at google.com> wrote:
>
>>
>>
>>
>> On Fri, Mar 28, 2014 at 2:25 PM, Jingyue Wu <jingyue at google.com> wrote:
>>
>>> Hi Matt,
>>>
>>> That makes sense, and here's a more constructive patch.
>>>
>>>
>> +          if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
>> +            return Res;
>> +          // Insert Res, and create an addrspacecast.
>> +          return new AddrSpaceCastInst(Builder->Insert(Res),
>> GEP.getType());
>>
>> Can you provide a bit more details in the comment? The surrounding code
>> has examples like:
>>
>> // -> GEP ... stuff
>>
>> Maybe something like this to clarify the sequence it turns to.
>>
>
> Done
>
>
>>
>>
>> + at array = internal addrspace(3) global [256 x float] zeroinitializer,
>> align 4
>> +
>> +define float* @keep_necessary_addrspacecast(i64 %i) {
>> +entry:
>> +; CHECK-LABEL: @keep_necessary_addrspacecast
>> +; CHECK: addrspacecast float addrspace(3)* %{{[0-9]+}} to float*
>> +  %0 = getelementptr [256 x float]* addrspacecast ([256 x float]
>> addrspace(3)* @array to [256 x float]*), i64 0, i64 %i
>> +  ret float* %0
>> +}
>>
>>
>> Please create test cases that cover both paths in the patch (array /
>> not-array).
>>
>
> Done
>
>
>>
>> Eli
>>
>>
>>>  Jingyue
>>>
>>>
>>> On Fri, Mar 28, 2014 at 12:07 PM, Matt Arsenault <
>>> Matthew.Arsenault at amd.com> wrote:
>>>
>>>>  On 03/28/2014 12:01 PM, Jingyue Wu wrote:
>>>>
>>>>  Fixing PR19270. This issue is blocking a waiting patch of mine that
>>>> implements the optimization we discussed in
>>>> http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-March/071440.html.
>>>>
>>>>  visitGetElementPtr in InstructionCombining.cpp gets rid of unnecessary
>>>> pointer casts in gep (cast X). However, this optimization may change the
>>>> address space of the result pointer type, and cause type mismatch.
>>>>
>>>>  e.g.,
>>>> getelementptr [256 x float]* addrspacecast ([256 x float] addrspace(3)*
>>>> @array to [256 x float]*), i64 0, i64 %i
>>>> returns a float*, but the optimized instruction
>>>> getelementptr [256 x float] addrspace(3)* @array, i64 0, i64 %i
>>>>  returns a float addrspace(3)*
>>>>
>>>> The attached patch disables this optimization when the address space of
>>>> the source is different from that of the destination.
>>>>
>>>>  Jingyue
>>>>
>>>>
>>>> If we're doing what I suggested and trying to do operations in the
>>>> original address space, then instead of disabling this, why don't you make
>>>> this instead just move the addrspacecast to after the GEP? Do the GEP in
>>>> addrspace(3), and then addrspacecast that.
>>>>
>>>> -Matt
>>>>
>>>
>>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140328/e8e06555/attachment.html>
-------------- next part --------------

diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 0cab81b..b81df85 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1241,7 +1241,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           GetElementPtrInst *Res =
             GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName());
           Res->setIsInBounds(GEP.isInBounds());
-          return Res;
+          if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
+            return Res;
+          // Insert Res, and create an addrspacecast.
+          // e.g.,
+          // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ...
+          // ->
+          // %0 = GEP i8 addrspace(1)* X, ...
+          // addrspacecast i8 addrspace(1)* %0 to i8*
+          return new AddrSpaceCastInst(Builder->Insert(Res), GEP.getType());
         }
 
         if (ArrayType *XATy =
@@ -1253,8 +1261,24 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             // to an array of the same type as the destination pointer
             // array.  Because the array type is never stepped over (there
             // is a leading zero) we can fold the cast into this GEP.
-            GEP.setOperand(0, StrippedPtr);
-            return &GEP;
+            if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
+              GEP.setOperand(0, StrippedPtr);
+              return &GEP;
+            }
+            // Cannot replace the base pointer directly because StrippedPtr's
+            // address space is different. Instead, create a new GEP followed by
+            // an addrspacecast.
+            // e.g.,
+            // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*),
+            //   i32 0, ...
+            // ->
+            // %0 = GEP [10 x i8] addrspace(1)* X, ...
+            // addrspacecast i8 addrspace(1)* %0 to i8*
+            SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
+            Value *NewGEP = GEP.isInBounds() ?
+              Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
+              Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+            return new AddrSpaceCastInst(NewGEP, GEP.getType());
           }
         }
       }
diff --git a/test/Transforms/InstCombine/gep-addrspace.ll b/test/Transforms/InstCombine/gep-addrspace.ll
index 24c355d..29511a3 100644
--- a/test/Transforms/InstCombine/gep-addrspace.ll
+++ b/test/Transforms/InstCombine/gep-addrspace.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-pc-win32"
@@ -17,3 +17,18 @@ ST:
   ret void
 }
 
+ at array = internal addrspace(3) global [256 x float] zeroinitializer, align 4
+ at scalar = internal addrspace(3) global float 0.000000e+00, align 4
+
+define void @keep_necessary_addrspacecast(i64 %i, float** %out0, float** %out1) {
+entry:
+; CHECK-LABEL: @keep_necessary_addrspacecast
+  %0 = getelementptr [256 x float]* addrspacecast ([256 x float] addrspace(3)* @array to [256 x float]*), i64 0, i64 %i
+; CHECK: addrspacecast float addrspace(3)* %{{[0-9]+}} to float*
+  %1 = getelementptr [0 x float]* addrspacecast (float addrspace(3)* @scalar to [0 x float]*), i64 0, i64 %i
+; CHECK: addrspacecast float addrspace(3)* %{{[0-9]+}} to float*
+  store float* %0, float** %out0, align 4
+  store float* %1, float** %out1, align 4
+  ret void
+}
+