[llvm] r301711 - InferAddressSpaces: Search constant expressions for addrspacecasts

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 28 15:52:42 PDT 2017


Author: arsenm
Date: Fri Apr 28 17:52:41 2017
New Revision: 301711

URL: http://llvm.org/viewvc/llvm-project?rev=301711&view=rev
Log:
InferAddressSpaces: Search constant expressions for addrspacecasts

These are pretty common when using local memory, and the 64-bit generic
addressing is much more expensive to compute.

Modified:
    llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp
    llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
    llvm/trunk/test/Transforms/InferAddressSpaces/NVPTX/bug31948.ll

Modified: llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp?rev=301711&r1=301710&r2=301711&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp Fri Apr 28 17:52:41 2017
@@ -138,7 +138,7 @@ private:
 
   // Tries to infer the specific address space of each address expression in
   // Postorder.
-  void inferAddressSpaces(const std::vector<Value *> &Postorder,
+  void inferAddressSpaces(ArrayRef<WeakVH> Postorder,
                           ValueToAddrSpaceMapTy *InferredAddrSpace) const;
 
   bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
@@ -147,7 +147,7 @@ private:
   // address spaces if InferredAddrSpace says so. Postorder is the postorder of
   // all flat expressions in the use-def graph of function F.
   bool
-  rewriteWithNewAddressSpaces(const std::vector<Value *> &Postorder,
+  rewriteWithNewAddressSpaces(ArrayRef<WeakVH> Postorder,
                               const ValueToAddrSpaceMapTy &InferredAddrSpace,
                               Function *F) const;
 
@@ -162,7 +162,7 @@ private:
     std::vector<std::pair<Value *, bool>> &PostorderStack,
     DenseSet<Value *> &Visited) const;
 
-  std::vector<Value *> collectFlatAddressExpressions(Function &F) const;
+  std::vector<WeakVH> collectFlatAddressExpressions(Function &F) const;
 
   Value *cloneValueWithNewAddressSpace(
     Value *V, unsigned NewAddrSpace,
@@ -274,16 +274,36 @@ void InferAddressSpaces::appendsFlatAddr
     Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack,
     DenseSet<Value *> &Visited) const {
   assert(V->getType()->isPointerTy());
+
+  // Generic addressing expressions may be hidden in nested constant
+  // expressions.
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    // TODO: Look in non-address parts, like icmp operands.
+    if (isAddressExpression(*CE) && Visited.insert(CE).second)
+      PostorderStack.push_back(std::make_pair(CE, false));
+
+    return;
+  }
+
   if (isAddressExpression(*V) &&
       V->getType()->getPointerAddressSpace() == FlatAddrSpace) {
-    if (Visited.insert(V).second)
+    if (Visited.insert(V).second) {
       PostorderStack.push_back(std::make_pair(V, false));
+
+      Operator *Op = cast<Operator>(V);
+      for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) {
+        if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {
+          if (isAddressExpression(*CE) && Visited.insert(CE).second)
+            PostorderStack.emplace_back(CE, false);
+        }
+      }
+    }
   }
 }
 
 // Returns all flat address expressions in function F. The elements are ordered
 // ordered in postorder.
-std::vector<Value *>
+std::vector<WeakVH>
 InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
   // This function implements a non-recursive postorder traversal of a partial
   // use-def graph of function F.
@@ -332,18 +352,19 @@ InferAddressSpaces::collectFlatAddressEx
     }
   }
 
-  std::vector<Value *> Postorder; // The resultant postorder.
+  std::vector<WeakVH> Postorder; // The resultant postorder.
   while (!PostorderStack.empty()) {
+    Value *TopVal = PostorderStack.back().first;
     // If the operands of the expression on the top are already explored,
     // adds that expression to the resultant postorder.
     if (PostorderStack.back().second) {
-      Postorder.push_back(PostorderStack.back().first);
+      Postorder.push_back(TopVal);
       PostorderStack.pop_back();
       continue;
     }
     // Otherwise, adds its operands to the stack and explores them.
     PostorderStack.back().second = true;
-    for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) {
+    for (Value *PtrOperand : getPointerOperands(*TopVal)) {
       appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
                                                    Visited);
     }
@@ -562,7 +583,7 @@ bool InferAddressSpaces::runOnFunction(F
     return false;
 
   // Collects all flat address expressions in postorder.
-  std::vector<Value *> Postorder = collectFlatAddressExpressions(F);
+  std::vector<WeakVH> Postorder = collectFlatAddressExpressions(F);
 
   // Runs a data-flow analysis to refine the address spaces of every expression
   // in Postorder.
@@ -574,8 +595,10 @@ bool InferAddressSpaces::runOnFunction(F
   return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F);
 }
 
+// Constants need to be tracked through RAUW to handle cases with nested
+// constant expressions, so wrap values in WeakVH.
 void InferAddressSpaces::inferAddressSpaces(
-    const std::vector<Value *> &Postorder,
+    ArrayRef<WeakVH> Postorder,
     ValueToAddrSpaceMapTy *InferredAddrSpace) const {
   SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
   // Initially, all expressions are in the uninitialized address space.
@@ -787,7 +810,7 @@ static Value::use_iterator skipToNextUse
 }
 
 bool InferAddressSpaces::rewriteWithNewAddressSpaces(
-  const std::vector<Value *> &Postorder,
+  ArrayRef<WeakVH> Postorder,
   const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
   // For each address expression to be modified, creates a clone of it with its
   // pointer operands converted to the new address space. Since the pointer
@@ -818,7 +841,9 @@ bool InferAddressSpaces::rewriteWithNewA
   SmallVector<Instruction *, 16> DeadInstructions;
 
   // Replaces the uses of the old address expressions with the new ones.
-  for (Value *V : Postorder) {
+  for (const WeakVH &WVH : Postorder) {
+    assert(WVH && "value was unexpectedly deleted");
+    Value *V = WVH;
     Value *NewV = ValueWithNewAddrSpace.lookup(V);
     if (NewV == nullptr)
       continue;
@@ -826,6 +851,17 @@ bool InferAddressSpaces::rewriteWithNewA
     DEBUG(dbgs() << "Replacing the uses of " << *V
                  << "\n  with\n  " << *NewV << '\n');
 
+    if (Constant *C = dyn_cast<Constant>(V)) {
+      Constant *Replace = ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
+                                                         C->getType());
+      if (C != Replace) {
+        DEBUG(dbgs() << "Inserting replacement const cast: "
+              << Replace << ": " << *Replace << '\n');
+        C->replaceAllUsesWith(Replace);
+        V = Replace;
+      }
+    }
+
     Value::use_iterator I, E, Next;
     for (I = V->use_begin(), E = V->use_end(); I != E; ) {
       Use &U = *I;

Modified: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll?rev=301711&r1=301710&r2=301711&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll (original)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll Fri Apr 28 17:52:41 2017
@@ -15,9 +15,8 @@ define void @simplified_constexpr_gep_ad
   ret void
 }
 
-; FIXME: Should be able to eliminate inner constantexpr addrspacecast.
 ; CHECK-LABEL: @constexpr_gep_addrspacecast(
-; CHECK: %gep0 = getelementptr inbounds double, double addrspace(3)* addrspacecast (double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384) to double addrspace(3)*), i64 %idx0
+; CHECK-NEXT: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0
 ; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8
 define void @constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
   %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
@@ -54,3 +53,21 @@ define amdgpu_kernel void @vector_gep(<4
   store i32 99, i32 addrspace(4)* %p3
   ret void
 }
+
+; CHECK-LABEL: @repeated_constexpr_gep_addrspacecast(
+; CHECK-NEXT: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0
+; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8
+; CHECK-NEXT: %gep1 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx1
+; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep1, align 8
+; CHECK-NEXT: ret void
+define void @repeated_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
+  %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
+  %asc0 = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)*
+  store double 1.0, double addrspace(3)* %asc0, align 8
+
+  %gep1 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx1
+  %asc1 = addrspacecast double addrspace(4)* %gep1 to double addrspace(3)*
+  store double 1.0, double addrspace(3)* %asc1, align 8
+
+  ret void
+}

Modified: llvm/trunk/test/Transforms/InferAddressSpaces/NVPTX/bug31948.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/NVPTX/bug31948.ll?rev=301711&r1=301710&r2=301711&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/NVPTX/bug31948.ll (original)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/NVPTX/bug31948.ll Fri Apr 28 17:52:41 2017
@@ -10,7 +10,7 @@ target datalayout = "e-i64:64-v16:16-v32
 ; CHECK: %tmp = load float*, float* addrspace(3)* getelementptr inbounds (%struct.bar, %struct.bar addrspace(3)* @var1, i64 0, i32 1), align 8
 ; CHECK: %tmp1 = load float, float* %tmp, align 4
 ; CHECK: store float %conv1, float* %tmp, align 4
-; CHECK: store i32 32, i32 addrspace(3)* addrspacecast (i32* bitcast (float** getelementptr (%struct.bar, %struct.bar* addrspacecast (%struct.bar addrspace(3)* @var1 to %struct.bar*), i64 0, i32 1) to i32*) to i32 addrspace(3)*), align 4
+; CHECK: store i32 32, i32 addrspace(3)* bitcast (float* addrspace(3)* getelementptr inbounds (%struct.bar, %struct.bar addrspace(3)* @var1, i64 0, i32 1) to i32 addrspace(3)*), align 4
 define void @bug31948(float %a, float* nocapture readnone %x, float* nocapture readnone %y) local_unnamed_addr #0 {
 entry:
   %tmp = load float*, float** getelementptr (%struct.bar, %struct.bar* addrspacecast (%struct.bar addrspace(3)* @var1 to %struct.bar*), i64 0, i32 1), align 8




More information about the llvm-commits mailing list