[llvm] r229959 - [InstCombine] Remove unnecessary variable indexing into single-element arrays

Hal Finkel hfinkel at anl.gov
Thu Feb 19 19:05:53 PST 2015


Author: hfinkel
Date: Thu Feb 19 21:05:53 2015
New Revision: 229959

URL: http://llvm.org/viewvc/llvm-project?rev=229959&view=rev
Log:
[InstCombine] Remove unnecessary variable indexing into single-element arrays

This change addresses a deficiency pointed out in PR22629. To copy from the bug
report:

[from the bug report]

Consider this code:

int f(int x) {
  int a[] = {12};
  return a[x];
}

GCC knows to optimize this to

movl     $12, %eax
ret

The code generated by recent Clang at -O3 is:

movslq   %edi, %rax
movl     .L_ZZ1fiE1a(,%rax,4), %eax
retq

.L_ZZ1fiE1a:
  .long    12                      # 0xc

[end from the bug report]

This definitely seems worth fixing. I've also seen this kind of code before (as
the base case of generic vector wrapper templates with one element).

The general idea is to look at the GEP feeding a load or a store, which has
some variable as its first non-zero index, and determine if that index must be
zero (or else an out-of-bounds access would occur). We can do this for allocas
and globals with constant initializers where we know the maximum size of the
underlying object. When we find such a GEP, we create a new one for the memory
access with that first variable index replaced with a constant zero.

Even if we can't eliminate the memory access (and sometimes we can't), it is
still useful because it removes unnecessary indexing calculations.

Added:
    llvm/trunk/test/Transforms/InstCombine/mem-gep-zidx.ll
Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
    llvm/trunk/test/Transforms/InstCombine/load-cmp.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp?rev=229959&r1=229958&r2=229959&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp Thu Feb 19 21:05:53 2015
@@ -468,6 +468,181 @@ static Instruction *combineLoadToOperati
   return nullptr;
 }
 
+// If we can determine that all possible objects pointed to by the provided
+// pointer value are, not only dereferenceable, but also definitively less than
+// or equal to the provided maximum size, then return true. Otherwise, return
+// false (constant global values and allocas fall into this category).
+//
+// FIXME: This should probably live in ValueTracking (or similar).
+static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
+                                     const DataLayout *DL) {
+  SmallPtrSet<Value *, 4> Visited;
+  SmallVector<Value *, 4> Worklist(1, V);
+
+  do {
+    Value *P = Worklist.pop_back_val();
+    P = P->stripPointerCasts();
+
+    if (!Visited.insert(P).second)
+      continue;
+
+    if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
+      Worklist.push_back(SI->getTrueValue());
+      Worklist.push_back(SI->getFalseValue());
+      continue;
+    }
+
+    if (PHINode *PN = dyn_cast<PHINode>(P)) {
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        Worklist.push_back(PN->getIncomingValue(i));
+      continue;
+    }
+
+    if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
+      if (GA->mayBeOverridden())
+        return false;
+      Worklist.push_back(GA->getAliasee());
+      continue;
+    }
+
+    // If we know how big this object is, and it is less than MaxSize, continue
+    // searching. Otherwise, return false.
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
+      if (!AI->getAllocatedType()->isSized())
+        return false;
+
+      ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
+      if (!CS)
+        return false;
+
+      uint64_t TypeSize = DL->getTypeAllocSize(AI->getAllocatedType());
+      // Make sure that, even if the multiplication below would wrap as an
+      // uint64_t, we still do the right thing.
+      if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
+        return false;
+      continue;
+    }
+
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
+      if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
+        return false;
+
+      uint64_t InitSize = DL->getTypeAllocSize(GV->getType()->getElementType());
+      if (InitSize > MaxSize)
+        return false;
+      continue;
+    }
+
+    return false;
+  } while (!Worklist.empty());
+
+  return true;
+}
+
+// If we're indexing into an object of a known size, and the outer index is
+// not a constant, but having any value but zero would lead to undefined
+// behavior, replace it with zero.
+//
+// For example, if we have:
+// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
+// ...
+// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
+// ... = load i32* %arrayidx, align 4
+// Then we know that we can replace %x in the GEP with i64 0.
+//
+// FIXME: We could fold any GEP index to zero that would cause UB if it were
+// not zero. Currently, we only handle the first such index. Also, we could
+// also search through non-zero constant indices if we kept track of the
+// offsets those indices implied.
+static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
+                                     Instruction *MemI, unsigned &Idx) {
+  const DataLayout *DL = IC.getDataLayout();
+  if (GEPI->getNumOperands() < 2 || !DL)
+    return false;
+
+  // Find the first non-zero index of a GEP. If all indices are zero, return
+  // one past the last index.
+  auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
+    unsigned I = 1;
+    for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
+      Value *V = GEPI->getOperand(I);
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
+        if (CI->isZero())
+          continue;
+
+      break;
+    }
+
+    return I;
+  };
+
+  // Skip through initial 'zero' indices, and find the corresponding pointer
+  // type. See if the next index is not a constant.
+  Idx = FirstNZIdx(GEPI);
+  if (Idx == GEPI->getNumOperands())
+    return false;
+  if (isa<Constant>(GEPI->getOperand(Idx)))
+    return false;
+
+  SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
+  Type *AllocTy =
+    GetElementPtrInst::getIndexedType(GEPI->getOperand(0)->getType(), Ops);
+  if (!AllocTy || !AllocTy->isSized())
+    return false;
+  uint64_t TyAllocSize = DL->getTypeAllocSize(AllocTy);
+
+  // If there are more indices after the one we might replace with a zero, make
+  // sure they're all non-negative. If any of them are negative, the overall
+  // address being computed might be before the base address determined by the
+  // first non-zero index.
+  auto IsAllNonNegative = [&]() {
+    for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
+      bool KnownNonNegative, KnownNegative;
+      IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative,
+                        KnownNegative, 0, MemI);
+      if (KnownNonNegative)
+        continue;
+      return false;
+    }
+
+    return true;
+  };
+
+  // FIXME: If the GEP is not inbounds, and there are extra indices after the
+  // one we'll replace, those could cause the address computation to wrap
+  // (rendering the IsAllNonNegative() check below insufficient). We can do
+  // better, ignoring zero indicies (and other indicies we can prove small
+  // enough not to wrap).
+  if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
+    return false;
+
+  // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
+  // also known to be dereferenceable.
+  return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
+         IsAllNonNegative();
+}
+
+// If we're indexing into an object with a variable index for the memory
+// access, but the object has only one element, we can assume that the index
+// will always be zero. If we replace the GEP, return it.
+template <typename T>
+static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
+                                          T &MemI) {
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
+    unsigned Idx;
+    if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
+      Instruction *NewGEPI = GEPI->clone();
+      NewGEPI->setOperand(Idx,
+        ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
+      NewGEPI->insertBefore(GEPI);
+      MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
+      return NewGEPI;
+    }
+  }
+
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   Value *Op = LI.getOperand(0);
 
@@ -489,6 +664,12 @@ Instruction *InstCombiner::visitLoadInst
       LI.setAlignment(EffectiveLoadAlign);
   }
 
+  // Replace GEP indices if possible.
+  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
+      Worklist.Add(NewGEPI);
+      return &LI;
+  }
+
   // None of the following transforms are legal for volatile/atomic loads.
   // FIXME: Some of it is okay for atomic loads; needs refactoring.
   if (!LI.isSimple()) return nullptr;
@@ -663,6 +844,12 @@ Instruction *InstCombiner::visitStoreIns
       SI.setAlignment(EffectiveStoreAlign);
   }
 
+  // Replace GEP indices if possible.
+  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
+      Worklist.Add(NewGEPI);
+      return &SI;
+  }
+
   // Don't hack volatile/atomic stores.
   // FIXME: Some bits are legal for atomic stores; needs refactoring.
   if (!SI.isSimple()) return nullptr;

Modified: llvm/trunk/test/Transforms/InstCombine/load-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/load-cmp.ll?rev=229959&r1=229958&r2=229959&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/load-cmp.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/load-cmp.ll Thu Feb 19 21:05:53 2015
@@ -230,7 +230,7 @@ define i1 @test10_struct(i32 %x) {
 ; NODL: getelementptr inbounds %Foo* @GS, i32 %x, i32 0
 
 ; P32-LABEL: @test10_struct(
-; P32: getelementptr inbounds %Foo* @GS, i32 %x, i32 0
+; P32: ret i1 false
   %p = getelementptr inbounds %Foo* @GS, i32 %x, i32 0
   %q = load i32* %p
   %r = icmp eq i32 %q, 9
@@ -256,8 +256,7 @@ define i1 @test10_struct_i16(i16 %x){
 ; NODL: getelementptr inbounds %Foo* @GS, i16 %x, i32 0
 
 ; P32-LABEL: @test10_struct_i16(
-; P32: %1 = sext i16 %x to i32
-; P32: getelementptr inbounds %Foo* @GS, i32 %1, i32 0
+; P32: ret i1 false
   %p = getelementptr inbounds %Foo* @GS, i16 %x, i32 0
   %q = load i32* %p
   %r = icmp eq i32 %q, 0
@@ -271,8 +270,7 @@ define i1 @test10_struct_i64(i64 %x){
 ; NODL: getelementptr inbounds %Foo* @GS, i64 %x, i32 0
 
 ; P32-LABEL: @test10_struct_i64(
-; P32: %1 = trunc i64 %x to i32
-; P32: getelementptr inbounds %Foo* @GS, i32 %1, i32 0
+; P32: ret i1 false
   %p = getelementptr inbounds %Foo* @GS, i64 %x, i32 0
   %q = load i32* %p
   %r = icmp eq i32 %q, 0

Added: llvm/trunk/test/Transforms/InstCombine/mem-gep-zidx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/mem-gep-zidx.ll?rev=229959&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/mem-gep-zidx.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/mem-gep-zidx.ll Thu Feb 19 21:05:53 2015
@@ -0,0 +1,48 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+ at f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
+ at f.b = private unnamed_addr constant [1 x i32] [i32 55], align 4
+
+define signext i32 @test1(i32 signext %x) #0 {
+entry:
+  %idxprom = sext i32 %x to i64
+  %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @test1
+; CHECK: ret i32 12
+}
+
+declare void @foo(i64* %p)
+define void @test2(i32 signext %x, i64 %v) #0 {
+entry:
+  %p = alloca i64
+  %idxprom = sext i32 %x to i64
+  %arrayidx = getelementptr inbounds i64* %p, i64 %idxprom
+  store i64 %v, i64* %arrayidx
+  call void @foo(i64* %p)
+  ret void
+
+; CHECK-LABEL: @test2
+; CHECK: %p = alloca i64
+; CHECK: store i64 %v, i64* %p
+; CHECK: ret void
+}
+
+define signext i32 @test3(i32 signext %x, i1 %y) #0 {
+entry:
+  %idxprom = sext i32 %x to i64
+  %p = select i1 %y, [1 x i32]* @f.a, [1 x i32]* @f.b
+  %arrayidx = getelementptr inbounds [1 x i32]* %p, i64 0, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @test3
+; CHECK: getelementptr inbounds [1 x i32]* %p, i64 0, i64 0
+}
+
+attributes #0 = { nounwind readnone }
+





More information about the llvm-commits mailing list