[llvm] r238574 - [NVPTXFavorNonGenericAddrSpaces] recursively trace into GEP and BitCast

Jingyue Wu jingyue at google.com
Fri May 29 10:00:27 PDT 2015


Author: jingyue
Date: Fri May 29 12:00:27 2015
New Revision: 238574

URL: http://llvm.org/viewvc/llvm-project?rev=238574&view=rev
Log:
[NVPTXFavorNonGenericAddrSpaces] recursively trace into GEP and BitCast

Summary:
This patch allows NVPTXFavorNonGenericAddrSpaces to remove addrspacecast
from longer chains consisting of GEPs and BitCasts. For example, it can
now optimize

  %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
  %1 = gep [10 x float]* %0, i64 0, i64 %i
  %2 = bitcast float* %1 to i32*
  %3 = load i32* %2 ; emits ld.u32

to

  %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
  %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)*
  %3 = load i32 addrspace(3)* %1 ; emits ld.shared.f32

Test Plan: @ld_int_from_global_float in access-non-generic.ll

Reviewers: broune, eliben, jholewinski, meheff

Subscribers: jholewinski, llvm-commits

Differential Revision: http://reviews.llvm.org/D10074

Modified:
    llvm/trunk/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
    llvm/trunk/test/CodeGen/NVPTX/access-non-generic.ll

Modified: llvm/trunk/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp?rev=238574&r1=238573&r2=238574&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp Fri May 29 12:00:27 2015
@@ -10,34 +10,54 @@
 // When a load/store accesses the generic address space, checks whether the
 // address is casted from a non-generic address space. If so, remove this
 // addrspacecast because accessing non-generic address spaces is typically
-// faster. Besides seeking addrspacecasts, this optimization also traces into
-// the base pointer of a GEP.
+// faster. Besides removing addrspacecasts directly used by loads/stores, this
+// optimization also recursively traces into a GEP's pointer operand and a
+// bitcast's source to find more eliminable addrspacecasts.
 //
 // For instance, the code below loads a float from an array allocated in
 // addrspace(3).
 //
-// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
-// %1 = gep [10 x float]* %0, i64 0, i64 %i
-// %2 = load float* %1 ; emits ld.f32
+//   %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
+//   %1 = gep [10 x float]* %0, i64 0, i64 %i
+//   %2 = bitcast float* %1 to i32*
+//   %3 = load i32* %2 ; emits ld.u32
 //
-// First, function hoistAddrSpaceCastFromGEP reorders the addrspacecast
-// and the GEP to expose more optimization opportunities to function
+// First, function hoistAddrSpaceCastFrom reorders the addrspacecast, the GEP,
+// and the bitcast to expose more optimization opportunities to function
 // optimizeMemoryInst. The intermediate code looks like:
 //
-// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
-// %1 = addrspacecast float addrspace(3)* %0 to float*
-// %2 = load float* %1 ; still emits ld.f32, but will be optimized shortly
+//   %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
+//   %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)*
+//   %2 = addrspacecast i32 addrspace(3)* %1 to i32*
+//   %3 = load i32* %2 ; still emits ld.u32, but will be optimized shortly
 //
 // Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed
 // generic pointers, and folds the load and the addrspacecast into a load from
 // the original address space. The final code looks like:
 //
-// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
-// %2 = load float addrspace(3)* %0 ; emits ld.shared.f32
+//   %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
+//   %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)*
+//   %3 = load i32 addrspace(3)* %1 ; emits ld.shared.f32
 //
 // This pass may remove an addrspacecast in a different BB. Therefore, we
 // implement it as a FunctionPass.
 //
+// TODO:
+// The current implementation doesn't handle PHINodes. Eliminating
+// addrspacecasts used by PHINodes is trickier because PHINodes can introduce
+// loops in data flow. For example,
+//
+//     %generic.input = addrspacecast float addrspace(3)* %input to float*
+//   loop:
+//     %y = phi [ %generic.input, %y2 ]
+//     %y2 = getelementptr %y, 1
+//     %v = load %y2
+//     br ..., label %loop, ...
+//
+// Marking %y2 shared depends on marking %y shared, but %y also data-flow
+// depends on %y2. We probably need an iterative fix-point algorithm on handle
+// this case.
+//
 //===----------------------------------------------------------------------===//
 
 #include "NVPTX.h"
@@ -62,17 +82,31 @@ class NVPTXFavorNonGenericAddrSpaces : p
 public:
   static char ID;
   NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {}
-
   bool runOnFunction(Function &F) override;
 
+private:
   /// Optimizes load/store instructions. Idx is the index of the pointer operand
   /// (0 for load, and 1 for store). Returns true if it changes anything.
   bool optimizeMemoryInstruction(Instruction *I, unsigned Idx);
+  /// Recursively traces into a GEP's pointer operand or a bitcast's source to
+  /// find an eliminable addrspacecast, and hoists that addrspacecast to the
+  /// outermost level. For example, this function transforms
+  ///   bitcast(gep(gep(addrspacecast(X))))
+  /// to
+  ///   addrspacecast(bitcast(gep(gep(X)))).
+  ///
+  /// This reordering exposes to optimizeMemoryInstruction more
+  /// optimization opportunities on loads and stores.
+  ///
+  /// Returns true if this function succesfully hoists an eliminable
+  /// addrspacecast or V is already such an addrspacecast.
   /// Transforms "gep (addrspacecast X), indices" into "addrspacecast (gep X,
-  /// indices)".  This reordering exposes to optimizeMemoryInstruction more
-  /// optimization opportunities on loads and stores. Returns true if it changes
-  /// the program.
-  bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP);
+  /// indices)".
+  bool hoistAddrSpaceCastFrom(Value *V, int Depth = 0);
+  /// Helper function for GEPs.
+  bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth);
+  /// Helper function for bitcasts.
+  bool hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth);
 };
 }
 
@@ -85,11 +119,12 @@ INITIALIZE_PASS(NVPTXFavorNonGenericAddr
                 "Remove unnecessary non-generic-to-generic addrspacecasts",
                 false, false)
 
-// Decides whether removing Cast is valid and beneficial. Cast can be an
-// instruction or a constant expression.
-static bool IsEliminableAddrSpaceCast(Operator *Cast) {
-  // Returns false if not even an addrspacecast.
-  if (Cast->getOpcode() != Instruction::AddrSpaceCast)
+// Decides whether V is an addrspacecast and shortcutting V in load/store is
+// valid and beneficial.
+static bool isEliminableAddrSpaceCast(Value *V) {
+  // Returns false if V is not even an addrspacecast.
+  Operator *Cast = dyn_cast<Operator>(V);
+  if (Cast == nullptr || Cast->getOpcode() != Instruction::AddrSpaceCast)
     return false;
 
   Value *Src = Cast->getOperand(0);
@@ -108,64 +143,116 @@ static bool IsEliminableAddrSpaceCast(Op
           DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC);
 }
 
-bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
-    GEPOperator *GEP) {
-  Operator *Cast = dyn_cast<Operator>(GEP->getPointerOperand());
-  if (!Cast)
+bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(GEPOperator *GEP,
+                                                               int Depth) {
+  if (!hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1))
     return false;
 
-  if (!IsEliminableAddrSpaceCast(Cast))
-    return false;
+  // That hoistAddrSpaceCastFrom succeeds implies GEP's pointer operand is now
+  // an eliminable addrspacecast.
+  assert(isEliminableAddrSpaceCast(GEP->getPointerOperand()));
+  Operator *Cast = cast<Operator>(GEP->getPointerOperand());
 
   SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
   if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) {
-    // %1 = gep (addrspacecast X), indices
+    // GEP = gep (addrspacecast X), indices
     // =>
-    // %0 = gep X, indices
-    // %1 = addrspacecast %0
-    GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(
+    // NewGEP = gep X, indices
+    // NewASC = addrspacecast NewGEP
+    GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
         GEP->getSourceElementType(), Cast->getOperand(0), Indices,
-        GEP->getName(), GEPI);
-    NewGEPI->setIsInBounds(GEP->isInBounds());
-    GEP->replaceAllUsesWith(
-        new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI));
+        "", GEPI);
+    NewGEP->setIsInBounds(GEP->isInBounds());
+    Value *NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI);
+    NewASC->takeName(GEP);
+    GEP->replaceAllUsesWith(NewASC);
   } else {
     // GEP is a constant expression.
-    Constant *NewGEPCE = ConstantExpr::getGetElementPtr(
+    Constant *NewGEP = ConstantExpr::getGetElementPtr(
         GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)),
         Indices, GEP->isInBounds());
     GEP->replaceAllUsesWith(
-        ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType()));
+        ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType()));
   }
 
   return true;
 }
 
-bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI,
-                                                               unsigned Idx) {
-  // If the pointer operand is a GEP, hoist the addrspacecast if any from the
-  // GEP to expose more optimization opportunites.
-  if (GEPOperator *GEP = dyn_cast<GEPOperator>(MI->getOperand(Idx))) {
-    hoistAddrSpaceCastFromGEP(GEP);
-  }
+bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast(
+    BitCastOperator *BC, int Depth) {
+  if (!hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1))
+    return false;
 
-  // load/store (addrspacecast X) => load/store X if shortcutting the
-  // addrspacecast is valid and can improve performance.
-  //
-  // e.g.,
-  // %1 = addrspacecast float addrspace(3)* %0 to float*
-  // %2 = load float* %1
-  // ->
-  // %2 = load float addrspace(3)* %0
-  //
-  // Note: the addrspacecast can also be a constant expression.
-  if (Operator *Cast = dyn_cast<Operator>(MI->getOperand(Idx))) {
-    if (IsEliminableAddrSpaceCast(Cast)) {
-      MI->setOperand(Idx, Cast->getOperand(0));
-      return true;
-    }
+  // That hoistAddrSpaceCastFrom succeeds implies BC's source operand is now
+  // an eliminable addrspacecast.
+  assert(isEliminableAddrSpaceCast(BC->getOperand(0)));
+  Operator *Cast = cast<Operator>(BC->getOperand(0));
+
+  // Cast  = addrspacecast Src
+  // BC    = bitcast Cast
+  //   =>
+  // Cast' = bitcast Src
+  // BC'   = addrspacecast Cast'
+  Value *Src = Cast->getOperand(0);
+  Type *TypeOfNewCast =
+      PointerType::get(BC->getType()->getPointerElementType(),
+                       Src->getType()->getPointerAddressSpace());
+  if (BitCastInst *BCI = dyn_cast<BitCastInst>(BC)) {
+    Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI);
+    Value *NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI);
+    NewBC->takeName(BC);
+    BC->replaceAllUsesWith(NewBC);
+  } else {
+    // BC is a constant expression.
+    Constant *NewCast =
+        ConstantExpr::getBitCast(cast<Constant>(Src), TypeOfNewCast);
+    Constant *NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType());
+    BC->replaceAllUsesWith(NewBC);
   }
+  return true;
+}
+
+bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V,
+                                                            int Depth) {
+  // Returns true if V is already an eliminable addrspacecast.
+  if (isEliminableAddrSpaceCast(V))
+    return true;
+
+  // Limit the depth to prevent this recursive function from running too long.
+  const int MaxDepth = 20;
+  if (Depth >= MaxDepth)
+    return false;
 
+  // If V is a GEP or bitcast, hoist the addrspacecast if any from its pointer
+  // operand. This enables optimizeMemoryInstruction to shortcut addrspacecasts
+  // that are not directly used by the load/store.
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
+    return hoistAddrSpaceCastFromGEP(GEP, Depth);
+
+  if (BitCastOperator *BC = dyn_cast<BitCastOperator>(V))
+    return hoistAddrSpaceCastFromBitCast(BC, Depth);
+
+  return false;
+}
+
+bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI,
+                                                               unsigned Idx) {
+  if (hoistAddrSpaceCastFrom(MI->getOperand(Idx))) {
+    // load/store (addrspacecast X) => load/store X if shortcutting the
+    // addrspacecast is valid and can improve performance.
+    //
+    // e.g.,
+    // %1 = addrspacecast float addrspace(3)* %0 to float*
+    // %2 = load float* %1
+    // ->
+    // %2 = load float addrspace(3)* %0
+    //
+    // Note: the addrspacecast can also be a constant expression.
+    assert(isEliminableAddrSpaceCast(MI->getOperand(Idx)));
+    Operator *ASC = dyn_cast<Operator>(MI->getOperand(Idx));
+    MI->setOperand(Idx, ASC->getOperand(0));
+    return true;
+  }
   return false;
 }
 

Modified: llvm/trunk/test/CodeGen/NVPTX/access-non-generic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/access-non-generic.ll?rev=238574&r1=238573&r2=238574&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/access-non-generic.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/access-non-generic.ll Fri May 29 12:00:27 2015
@@ -85,6 +85,22 @@ define i32 @ld_int_from_float() {
   ret i32 %1
 }
 
+define i32 @ld_int_from_global_float(float addrspace(1)* %input, i32 %i, i32 %j) {
+; IR-LABEL: @ld_int_from_global_float(
+; PTX-LABEL: ld_int_from_global_float(
+  %1 = addrspacecast float addrspace(1)* %input to float*
+  %2 = getelementptr float, float* %1, i32 %i
+; IR-NEXT: getelementptr float, float addrspace(1)* %input, i32 %i
+  %3 = getelementptr float, float* %2, i32 %j
+; IR-NEXT: getelementptr float, float addrspace(1)* {{%[^,]+}}, i32 %j
+  %4 = bitcast float* %3 to i32*
+; IR-NEXT: bitcast float addrspace(1)* {{%[^ ]+}} to i32 addrspace(1)*
+  %5 = load i32, i32* %4
+; IR-NEXT: load i32, i32 addrspace(1)* {{%.+}}
+; PTX-LABEL: ld.global
+  ret i32 %5
+}
+
 declare void @llvm.cuda.syncthreads() #3
 
 attributes #3 = { noduplicate nounwind }





More information about the llvm-commits mailing list