[llvm] r274853 - InstCombine rule to fold truncs whose value is available
Anna Thomas via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 8 08:18:56 PDT 2016
Author: annat
Date: Fri Jul 8 10:18:56 2016
New Revision: 274853
URL: http://llvm.org/viewvc/llvm-project?rev=274853&view=rev
Log:
InstCombine rule to fold truncs whose value is available
We can fold truncs whose operand feeds from a load, if the trunc value
is available through a prior load/store.
This change is from: http://reviews.llvm.org/D21246, which folded the
trunc but missed the bitcast or ptrtoint/inttoptr required in the RAUW
call, when the load type didnt match the prior load/store type.
Differential Revision: http://reviews.llvm.org/D21791
Modified:
llvm/trunk/include/llvm/Analysis/Loads.h
llvm/trunk/lib/Analysis/Loads.cpp
llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp
llvm/trunk/test/Transforms/InstCombine/trunc.ll
Modified: llvm/trunk/include/llvm/Analysis/Loads.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/Loads.h?rev=274853&r1=274852&r2=274853&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/Loads.h (original)
+++ llvm/trunk/include/llvm/Analysis/Loads.h Fri Jul 8 10:18:56 2016
@@ -58,6 +58,44 @@ bool isSafeToLoadUnconditionally(Value *
/// to scan in the block, used by FindAvailableLoadedValue().
extern cl::opt<unsigned> DefMaxInstsToScan;
+/// \brief Scan backwards to see if we have the value of type \p AccessTy
+/// at the memory address \p Ptr locally available within a
+/// small number of instructions. If the value is available, return it.
+///
+/// You can use this function to scan across multiple blocks: after you call
+/// this function, if ScanFrom points at the beginning of the block, it's safe
+/// to continue scanning the predecessors.
+/// Note that we assume the \p *Ptr is accessed through a non-volatile but
+/// potentially atomic load. Any other constraints should be verified at the
+/// caller.
+///
+/// \param Ptr The memory location whose contents we are retrieving
+/// \param AccessTy The type (and size) of the contents we need from \p Ptr
+/// \param IsAtomicMemOp specifies the atomicity of the memory operation that accesses
+/// \p *Ptr. We verify atomicity constraints are satisfied when value forwarding
+/// from another memory operation that has value \p *Ptr available.
+/// \param ScanBB The basic block to scan. FIXME: This is redundant.
+/// \param [in,out] ScanFrom The location to start scanning from. When this
+/// function returns, it points at the last instruction scanned.
+/// \param MaxInstsToScan The maximum number of instructions to scan. If this
+/// is zero, the whole block will be scanned.
+/// \param AA Optional pointer to alias analysis, to make the scan more
+/// precise.
+/// \param [out] AATags The aliasing metadata for the operation which produced
+/// the value. FIXME: This is basically useless.
+///
+/// \param [out] IsLoadCSE Whether the returned value is a load from the same
+/// location in memory, as opposed to the value operand of a store.
+///
+/// \returns The found value, or nullptr if no value is found.
+Value *FindAvailableLoadedValue(Value *Ptr, Type *AccessTy, bool IsAtomicMemOp,
+ BasicBlock *ScanBB,
+ BasicBlock::iterator &ScanFrom,
+ unsigned MaxInstsToScan,
+ AliasAnalysis *AA = nullptr,
+ AAMDNodes *AATags = nullptr,
+ bool *IsLoadCSE = nullptr);
+
/// \brief Scan backwards to see if we have the value of the given load
/// available locally within a small number of instructions.
///
Modified: llvm/trunk/lib/Analysis/Loads.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/Loads.cpp?rev=274853&r1=274852&r2=274853&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/Loads.cpp (original)
+++ llvm/trunk/lib/Analysis/Loads.cpp Fri Jul 8 10:18:56 2016
@@ -297,27 +297,17 @@ llvm::DefMaxInstsToScan("available-load-
"to scan backward from a given instruction, when searching for "
"available loaded value"));
-Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
+
+Value *llvm::FindAvailableLoadedValue(Value *Ptr, Type *AccessTy,
+ bool IsAtomicMemOp, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan,
AliasAnalysis *AA, AAMDNodes *AATags,
bool *IsLoadCSE) {
+
if (MaxInstsToScan == 0)
MaxInstsToScan = ~0U;
-
- Value *Ptr = Load->getPointerOperand();
- Type *AccessTy = Load->getType();
-
- // We can never remove a volatile load
- if (Load->isVolatile())
- return nullptr;
-
- // Anything stronger than unordered is currently unimplemented.
- if (!Load->isUnordered())
- return nullptr;
-
const DataLayout &DL = ScanBB->getModule()->getDataLayout();
-
// Try to get the store size for the type.
uint64_t AccessSize = DL.getTypeStoreSize(AccessTy);
@@ -348,7 +338,7 @@ Value *llvm::FindAvailableLoadedValue(Lo
// We can value forward from an atomic to a non-atomic, but not the
// other way around.
- if (LI->isAtomic() < Load->isAtomic())
+ if (LI->isAtomic() < IsAtomicMemOp)
return nullptr;
if (AATags)
@@ -369,7 +359,7 @@ Value *llvm::FindAvailableLoadedValue(Lo
// We can value forward from an atomic to a non-atomic, but not the
// other way around.
- if (SI->isAtomic() < Load->isAtomic())
+ if (SI->isAtomic() < IsAtomicMemOp)
return nullptr;
if (AATags)
@@ -413,3 +403,24 @@ Value *llvm::FindAvailableLoadedValue(Lo
// block.
return nullptr;
}
+
+
+Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
+ BasicBlock::iterator &ScanFrom,
+ unsigned MaxInstsToScan,
+ AliasAnalysis *AA, AAMDNodes *AATags,
+ bool *IsLoadCSE) {
+
+ // We can never remove a volatile load
+ if (Load->isVolatile())
+ return nullptr;
+
+ // Anything stronger than unordered is currently unimplemented.
+ if (!Load->isUnordered())
+ return nullptr;
+
+ // Return the full value of the load if available.
+ return FindAvailableLoadedValue(Load->getPointerOperand(), Load->getType(),
+ Load->isAtomic(), ScanBB, ScanFrom,
+ MaxInstsToScan, AA, AATags, IsLoadCSE);
+}
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp?rev=274853&r1=274852&r2=274853&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp Fri Jul 8 10:18:56 2016
@@ -13,9 +13,10 @@
#include "InstCombineInternal.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
using namespace PatternMatch;
@@ -575,6 +576,27 @@ Instruction *InstCombiner::visitTrunc(Tr
if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL))
return I;
+ // When trunc operand is a widened load, see if we can get the value from a
+ // previous store/load
+ if (auto *LI = dyn_cast<LoadInst>(Src)) {
+ BasicBlock::iterator BBI(*LI);
+
+ // Scan a few instructions up from LI and if we find a partial load/store
+ // of Type DestTy that feeds into LI, we can replace all uses of the trunc
+ // with the load/store value.
+ // This replacement can be done only in the case of non-volatile loads, with
+ // ordering at most unordered. If the load is atomic, its only use should be
+ // the trunc instruction. We don't want to allow other users of LI to see a
+ // value that is out of sync with the value we're folding the trunc to (in
+ // case of a race).
+ if (LI->isUnordered() && (!LI->isAtomic() || LI->hasOneUse()))
+ if (Value *AvailableVal = FindAvailableLoadedValue(
+ LI->getPointerOperand(), DestTy, LI->isAtomic(), LI->getParent(),
+ BBI, DefMaxInstsToScan))
+ return replaceInstUsesWith(
+ CI, Builder->CreateBitOrPointerCast(AvailableVal, CI.getType(),
+ CI.getName() + ".cast"));
+ }
return nullptr;
}
Modified: llvm/trunk/test/Transforms/InstCombine/trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/trunc.ll?rev=274853&r1=274852&r2=274853&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/trunc.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/trunc.ll Fri Jul 8 10:18:56 2016
@@ -181,3 +181,133 @@ bb1:
bb2:
unreachable
}
+
+
+; the trunc can be replaced from value available from store
+; load feeding into trunc left as-is.
+declare void @consume(i8) readonly
+define i1 @trunc_load_store(i8* align 2 %a) {
+ store i8 0, i8 *%a, align 2
+ %bca = bitcast i8* %a to i16*
+ %wide.load = load i16, i16* %bca, align 2
+ %lowhalf.1 = trunc i16 %wide.load to i8
+ call void @consume(i8 %lowhalf.1)
+ %cmp.2 = icmp ult i16 %wide.load, 256
+ ret i1 %cmp.2
+; CHECK-LABEL: @trunc_load_store
+; CHECK: %wide.load = load i16, i16* %bca, align 2
+; CHECK-NOT: trunc
+; CHECK: call void @consume(i8 0)
+}
+
+
+; The trunc can be replaced with the load value.
+; both loads left as-is, since they have uses.
+define i1 @trunc_load_load(i8* align 2 %a) {
+ %pload = load i8, i8* %a, align 2
+ %bca = bitcast i8* %a to i16*
+ %wide.load = load i16, i16* %bca, align 2
+ %lowhalf = trunc i16 %wide.load to i8
+ call void @consume(i8 %lowhalf)
+ call void @consume(i8 %pload)
+ %cmp.2 = icmp ult i16 %wide.load, 256
+ ret i1 %cmp.2
+; CHECK-LABEL: @trunc_load_load
+; CHECK-NEXT: %pload = load i8, i8* %a, align 2
+; CHECK-NEXT: %bca = bitcast i8* %a to i16*
+; CHECK-NEXT: %wide.load = load i16, i16* %bca, align 2
+; CHECK-NEXT: call void @consume(i8 %pload)
+; CHECK-NEXT: call void @consume(i8 %pload)
+; CHECK-NEXT: %cmp.2 = icmp ult i16 %wide.load, 256
+}
+
+; Store and load to same memory location address generated through GEP.
+; trunc can be removed by using the store value.
+define void @trunc_with_gep_memaccess(i16* align 2 %p) {
+ %t0 = getelementptr i16, i16* %p, i32 1
+ store i16 2, i16* %t0
+ %t1 = getelementptr i16, i16* %p, i32 1
+ %x = load i16, i16* %t1
+ %lowhalf = trunc i16 %x to i8
+ call void @consume(i8 %lowhalf)
+ ret void
+; CHECK-LABEL: @trunc_with_gep_memaccess
+; CHECK-NOT: trunc
+; CHECK: call void @consume(i8 2)
+}
+
+; trunc should not be replaced since atomic load %wide.load has more than one use.
+; different values can be seen by the uses of %wide.load in case of race.
+define i1 @trunc_atomic_loads(i8* align 2 %a) {
+ %pload = load atomic i8, i8* %a unordered, align 2
+ %bca = bitcast i8* %a to i16*
+ %wide.load = load atomic i16, i16* %bca unordered, align 2
+ %lowhalf = trunc i16 %wide.load to i8
+ call void @consume(i8 %lowhalf)
+ call void @consume(i8 %pload)
+ %cmp.2 = icmp ult i16 %wide.load, 256
+ ret i1 %cmp.2
+; CHECK-LABEL: @trunc_atomic_loads
+; CHECK: trunc
+}
+
+; trunc can be replaced since atomic load has single use.
+; atomic load is also removed since use is removed.
+define void @trunc_atomic_single_load(i8* align 2 %a) {
+ %pload = load atomic i8, i8* %a unordered, align 2
+ %bca = bitcast i8* %a to i16*
+ %wide.load = load atomic i16, i16* %bca unordered, align 2
+ %lowhalf = trunc i16 %wide.load to i8
+ call void @consume(i8 %lowhalf)
+ call void @consume(i8 %pload)
+ ret void
+; CHECK-LABEL: @trunc_atomic_single_load
+; CHECK-NOT: trunc
+; CHECK-NOT: %wide.load = load atomic i16, i16* %bca unordered, align 2
+}
+
+
+; trunc cannot be replaced since load's atomic ordering is higher than unordered
+define void @trunc_atomic_monotonic(i8* align 2 %a) {
+ %pload = load atomic i8, i8* %a monotonic, align 2
+ %bca = bitcast i8* %a to i16*
+ %wide.load = load atomic i16, i16* %bca monotonic, align 2
+ %lowhalf = trunc i16 %wide.load to i8
+ call void @consume(i8 %lowhalf)
+ call void @consume(i8 %pload)
+ ret void
+; CHECK-LABEL: @trunc_atomic_monotonic
+; CHECK: %wide.load = load atomic i16, i16* %bca monotonic, align 2
+; CHECK: trunc
+}
+
+; trunc cannot be replaced since store size (i16) is not trunc result size (i8).
+; FIXME: we could get the i8 content of trunc from the i16 store value.
+define i1 @trunc_different_size_load(i16 * align 2 %a) {
+ store i16 0, i16 *%a, align 2
+ %bca = bitcast i16* %a to i32*
+ %wide.load = load i32, i32* %bca, align 2
+ %lowhalf = trunc i32 %wide.load to i8
+ call void @consume(i8 %lowhalf)
+ %cmp.2 = icmp ult i32 %wide.load, 256
+ ret i1 %cmp.2
+; CHECK-LABEL: @trunc_different_size_load
+; CHECK: %lowhalf = trunc i32 %wide.load to i8
+}
+
+declare void @consume_f(float) readonly
+; bitcast required since trunc result type and %fload are different types.
+; so replace the trunc with bitcast.
+define i1 @trunc_avoid_bitcast(float* %b) {
+ %fload = load float, float* %b
+ %bca = bitcast float* %b to i64*
+ %iload = load i64, i64* %bca
+ %low32 = trunc i64 %iload to i32
+ call void @consume_f(float %fload)
+ %cmp.2 = icmp ult i32 %low32, 256
+ ret i1 %cmp.2
+; CHECK-LABEL: @trunc_avoid_bitcast
+; CHECK-NOT: %low32 = trunc i64 %iload to i32
+; CHECK: %low32.cast = bitcast float %fload to i32
+; CHECK: %cmp.2 = icmp ult i32 %low32.cast, 256
+}
More information about the llvm-commits
mailing list