[llvm] r345327 - [SystemZ] Improve getMemoryOpCost() to find foldable loads that are converted.
Jonas Paulsson via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 25 15:28:25 PDT 2018
Author: jonpa
Date: Thu Oct 25 15:28:25 2018
New Revision: 345327
URL: http://llvm.org/viewvc/llvm-project?rev=345327&view=rev
Log:
[SystemZ] Improve getMemoryOpCost() to find foldable loads that are converted.
The SystemZ backend can do arithmetic of memory by loading and then extending
one of the operands. Similarly, a load + truncate can be folded into an
operand.
This patch improves the SystemZ TTI cost function to recognize this.
Review: Ulrich Weigand
https://reviews.llvm.org/D52692
Modified:
llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h
llvm/trunk/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll
Modified: llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp?rev=345327&r1=345326&r2=345327&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp Thu Oct 25 15:28:25 2018
@@ -849,54 +849,102 @@ getVectorInstrCost(unsigned Opcode, Type
return BaseT::getVectorInstrCost(Opcode, Val, Index);
}
+// Check if a load may be folded as a memory operand in its user.
+bool SystemZTTIImpl::
+isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue) {
+ if (!Ld->hasOneUse())
+ return false;
+ FoldedValue = Ld;
+ const Instruction *UserI = cast<Instruction>(*Ld->user_begin());
+ unsigned LoadedBits = getScalarSizeInBits(Ld->getType());
+ unsigned TruncBits = 0;
+ unsigned SExtBits = 0;
+ unsigned ZExtBits = 0;
+ if (UserI->hasOneUse()) {
+ unsigned UserBits = UserI->getType()->getScalarSizeInBits();
+ if (isa<TruncInst>(UserI))
+ TruncBits = UserBits;
+ else if (isa<SExtInst>(UserI))
+ SExtBits = UserBits;
+ else if (isa<ZExtInst>(UserI))
+ ZExtBits = UserBits;
+ }
+ if (TruncBits || SExtBits || ZExtBits) {
+ FoldedValue = UserI;
+ UserI = cast<Instruction>(*UserI->user_begin());
+ // Load (single use) -> trunc/extend (single use) -> UserI
+ }
+ switch (UserI->getOpcode()) {
+ case Instruction::Add: // SE: 16->32, 16/32->64, z14:16->64. ZE: 32->64
+ case Instruction::Sub:
+ if (LoadedBits == 32 && ZExtBits == 64)
+ return true;
+ LLVM_FALLTHROUGH;
+ case Instruction::Mul: // SE: 16->32, 32->64, z14:16->64
+ if (LoadedBits == 16 &&
+ (SExtBits == 32 ||
+ (SExtBits == 64 && ST->hasMiscellaneousExtensions2())))
+ return true;
+ LLVM_FALLTHROUGH;
+ case Instruction::SDiv:// SE: 32->64
+ if (LoadedBits == 32 && SExtBits == 64)
+ return true;
+ LLVM_FALLTHROUGH;
+ case Instruction::UDiv:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ // This also makes sense for float operations, but disabled for now due
+ // to regressions.
+ // case Instruction::FCmp:
+ // case Instruction::FAdd:
+ // case Instruction::FSub:
+ // case Instruction::FMul:
+ // case Instruction::FDiv:
+
+ // All possible extensions of memory checked above.
+ if (SExtBits || ZExtBits)
+ return false;
+
+ unsigned LoadOrTruncBits = (TruncBits ? TruncBits : LoadedBits);
+ return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64);
+ break;
+ }
+ return false;
+}
+
int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment, unsigned AddressSpace,
const Instruction *I) {
assert(!Src->isVoidTy() && "Invalid type");
- if (!Src->isVectorTy() && Opcode == Instruction::Load &&
- I != nullptr && I->hasOneUse()) {
- const Instruction *UserI = cast<Instruction>(*I->user_begin());
- unsigned Bits = getScalarSizeInBits(Src);
- bool FoldsLoad = false;
- switch (UserI->getOpcode()) {
- case Instruction::ICmp:
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul:
- case Instruction::SDiv:
- case Instruction::UDiv:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- // This also makes sense for float operations, but disabled for now due
- // to regressions.
- // case Instruction::FCmp:
- // case Instruction::FAdd:
- // case Instruction::FSub:
- // case Instruction::FMul:
- // case Instruction::FDiv:
- FoldsLoad = (Bits == 32 || Bits == 64);
- break;
- }
-
- if (FoldsLoad) {
- assert (UserI->getNumOperands() == 2 &&
- "Expected to only handle binops.");
-
- // UserI can't fold two loads, so in that case return 0 cost only
- // half of the time.
- for (unsigned i = 0; i < 2; ++i) {
- if (UserI->getOperand(i) == I)
- continue;
- if (LoadInst *LI = dyn_cast<LoadInst>(UserI->getOperand(i))) {
- if (LI->hasOneUse())
- return i == 0;
- }
+ if (!Src->isVectorTy() && Opcode == Instruction::Load && I != nullptr) {
+ // Store the load or its truncated or extended value in FoldedValue.
+ const Instruction *FoldedValue = nullptr;
+ if (isFoldableLoad(cast<LoadInst>(I), FoldedValue)) {
+ const Instruction *UserI = cast<Instruction>(*FoldedValue->user_begin());
+ assert (UserI->getNumOperands() == 2 && "Expected a binop.");
+
+ // UserI can't fold two loads, so in that case return 0 cost only
+ // half of the time.
+ for (unsigned i = 0; i < 2; ++i) {
+ if (UserI->getOperand(i) == FoldedValue)
+ continue;
+
+ if (Instruction *OtherOp = dyn_cast<Instruction>(UserI->getOperand(i))){
+ LoadInst *OtherLoad = dyn_cast<LoadInst>(OtherOp);
+ if (!OtherLoad &&
+ (isa<TruncInst>(OtherOp) || isa<SExtInst>(OtherOp) ||
+ isa<ZExtInst>(OtherOp)))
+ OtherLoad = dyn_cast<LoadInst>(OtherOp->getOperand(0));
+ if (OtherLoad && isFoldableLoad(OtherLoad, FoldedValue/*dummy*/))
+ return i == 0; // Both operands foldable.
}
-
- return 0;
}
+
+ return 0; // Only I is foldable in user.
+ }
}
unsigned NumOps =
Modified: llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h?rev=345327&r1=345326&r2=345327&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h Thu Oct 25 15:28:25 2018
@@ -85,6 +85,7 @@ public:
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
Modified: llvm/trunk/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll?rev=345327&r1=345326&r2=345327&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll Thu Oct 25 15:28:25 2018
@@ -1,4 +1,7 @@
-; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \
+; RUN: | FileCheck %s -check-prefixes=CHECK,Z13
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z14 \
+; RUN: | FileCheck %s -check-prefixes=CHECK,Z14
;
; Test that loads into operations that can fold one memory operand get zero
; cost. In the case that both operands are loaded, one load should get a cost
@@ -19,6 +22,35 @@ define void @add() {
%li64_1 = load i64, i64* undef
add i64 %li64_0, %li64_1
+ ; Truncated load
+ %li64_2 = load i64, i64* undef
+ %tr = trunc i64 %li64_2 to i32
+ add i32 %tr, undef
+
+ ; Sign-extended loads
+ %li16_0 = load i16, i16* undef
+ %sext_0 = sext i16 %li16_0 to i32
+ add i32 %sext_0, undef
+
+ %li16_1 = load i16, i16* undef
+ %sext_1 = sext i16 %li16_1 to i64
+ add i64 %sext_1, undef
+
+ %li32_2 = load i32, i32* undef
+ %sext_2 = sext i32 %li32_2 to i64
+ add i64 %sext_2, undef
+
+ ; Zero-extended loads
+ %li32_3 = load i32, i32* undef
+ %zext_0 = zext i32 %li32_3 to i64
+ add i64 %zext_0, undef
+
+ ; Loads with multiple uses are *not* folded
+ %li16_3 = load i16, i16* undef
+ %sext_3 = sext i16 %li16_3 to i32
+ %sext_4 = sext i16 %li16_3 to i32
+ add i32 %sext_3, undef
+
ret void;
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
@@ -31,6 +63,26 @@ define void @add() {
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = add i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = add i32 %tr, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li16_0 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_0 = sext i16 %li16_0 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = add i32 %sext_0, undef
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %li16_1 = load i16, i16* undef
+; Z14: Cost Model: Found an estimated cost of 0 for instruction: %li16_1 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_1 = sext i16 %li16_1 to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %7 = add i64 %sext_1, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_2 = sext i32 %li32_2 to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = add i64 %sext_2, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_3 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %zext_0 = zext i32 %li32_3 to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = add i64 %zext_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_3 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_3 = sext i16 %li16_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_4 = sext i16 %li16_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = add i32 %sext_3, undef
}
define void @sub() {
@@ -48,6 +100,35 @@ define void @sub() {
%li64_1 = load i64, i64* undef
sub i64 %li64_0, %li64_1
+ ; Truncated load
+ %li64_2 = load i64, i64* undef
+ %tr = trunc i64 %li64_2 to i32
+ sub i32 %tr, undef
+
+ ; Sign-extended loads
+ %li16_0 = load i16, i16* undef
+ %sext_0 = sext i16 %li16_0 to i32
+ sub i32 %sext_0, undef
+
+ %li16_1 = load i16, i16* undef
+ %sext_1 = sext i16 %li16_1 to i64
+ sub i64 %sext_1, undef
+
+ %li32_2 = load i32, i32* undef
+ %sext_2 = sext i32 %li32_2 to i64
+ sub i64 %sext_2, undef
+
+ ; Zero-extended loads
+ %li32_3 = load i32, i32* undef
+ %zext_0 = zext i32 %li32_3 to i64
+ sub i64 %zext_0, undef
+
+ ; Loads with multiple uses are *not* folded
+ %li16_3 = load i16, i16* undef
+ %sext_3 = sext i16 %li16_3 to i32
+ %sext_4 = sext i16 %li16_3 to i32
+ sub i32 %sext_3, undef
+
ret void;
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
@@ -60,6 +141,26 @@ define void @sub() {
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = sub i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = sub i32 %tr, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li16_0 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_0 = sext i16 %li16_0 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = sub i32 %sext_0, undef
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %li16_1 = load i16, i16* undef
+; Z14: Cost Model: Found an estimated cost of 0 for instruction: %li16_1 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_1 = sext i16 %li16_1 to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %7 = sub i64 %sext_1, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_2 = sext i32 %li32_2 to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = sub i64 %sext_2, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_3 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %zext_0 = zext i32 %li32_3 to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = sub i64 %zext_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_3 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_3 = sext i16 %li16_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_4 = sext i16 %li16_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = sub i32 %sext_3, undef
}
define void @mul() {
@@ -77,6 +178,35 @@ define void @mul() {
%li64_1 = load i64, i64* undef
mul i64 %li64_0, %li64_1
+ ; Truncated load
+ %li64_2 = load i64, i64* undef
+ %tr = trunc i64 %li64_2 to i32
+ mul i32 %tr, undef
+
+ ; Sign-extended loads
+ %li16_0 = load i16, i16* undef
+ %sext_0 = sext i16 %li16_0 to i32
+ mul i32 %sext_0, undef
+
+ %li16_1 = load i16, i16* undef
+ %sext_1 = sext i16 %li16_1 to i64
+ mul i64 %sext_1, undef
+
+ %li32_2 = load i32, i32* undef
+ %sext_2 = sext i32 %li32_2 to i64
+ mul i64 %sext_2, undef
+
+ ; Zero-extended loads are *not* folded
+ %li16_2 = load i16, i16* undef
+ %zext_0 = zext i16 %li16_2 to i32
+ mul i32 %zext_0, undef
+
+ ; Loads with multiple uses are *not* folded
+ %li16_3 = load i16, i16* undef
+ %sext_3 = sext i16 %li16_3 to i32
+ %sext_4 = sext i16 %li16_3 to i32
+ mul i32 %sext_3, undef
+
ret void;
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = mul i32 %li32, undef
@@ -88,6 +218,26 @@ define void @mul() {
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = mul i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = mul i32 %tr, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li16_0 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_0 = sext i16 %li16_0 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = mul i32 %sext_0, undef
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %li16_1 = load i16, i16* undef
+; Z14: Cost Model: Found an estimated cost of 0 for instruction: %li16_1 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_1 = sext i16 %li16_1 to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %7 = mul i64 %sext_1, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_2 = sext i32 %li32_2 to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = mul i64 %sext_2, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_2 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %zext_0 = zext i16 %li16_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = mul i32 %zext_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_3 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_3 = sext i16 %li16_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_4 = sext i16 %li16_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = mul i32 %sext_3, undef
}
define void @sdiv(i32 %arg32, i64 %arg64) {
@@ -105,6 +255,22 @@ define void @sdiv(i32 %arg32, i64 %arg64
%li64_1 = load i64, i64* undef
sdiv i64 %li64_0, %li64_1
+ ; Truncated load
+ %li64_2 = load i64, i64* undef
+ %tr = trunc i64 %li64_2 to i32
+ sdiv i32 %tr, undef
+
+ ; Sign-extended loads
+ %li32_2 = load i32, i32* undef
+ %sext_0 = sext i32 %li32_2 to i64
+ sdiv i64 %sext_0, undef
+
+ ; Loads with multiple uses are *not* folded
+ %li32_3 = load i32, i32* undef
+ %sext_1 = sext i32 %li32_3 to i64
+ %sext_2 = sext i32 %li32_3 to i64
+ sdiv i64 %sext_1, undef
+
ret void;
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
; CHECK: Cost Model: Found an estimated cost of 21 for instruction: %1 = sdiv i32 %li32, %arg32
@@ -116,6 +282,16 @@ define void @sdiv(i32 %arg32, i64 %arg64
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %4 = sdiv i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %5 = sdiv i32 %tr, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_0 = sext i32 %li32_2 to i64
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %6 = sdiv i64 %sext_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_3 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_1 = sext i32 %li32_3 to i64
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_2 = sext i32 %li32_3 to i64
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %7 = sdiv i64 %sext_1, undef
}
define void @udiv(i32 %arg32, i64 %arg64) {
@@ -133,6 +309,16 @@ define void @udiv(i32 %arg32, i64 %arg64
%li64_1 = load i64, i64* undef
udiv i64 %li64_0, %li64_1
+ ; Truncated load
+ %li64_2 = load i64, i64* undef
+ %tr_0 = trunc i64 %li64_2 to i32
+ udiv i32 %tr_0, undef
+
+ ; Loads with multiple uses are *not* folded
+ %li64_3 = load i64, i64* undef
+ %tr_1 = trunc i64 %li64_3 to i32
+ udiv i64 %li64_3, undef
+
ret void;
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
; CHECK: Cost Model: Found an estimated cost of 21 for instruction: %1 = udiv i32 %li32, %arg32
@@ -144,6 +330,12 @@ define void @udiv(i32 %arg32, i64 %arg64
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 21 for instruction: %4 = udiv i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_0 = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %5 = udiv i32 %tr_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_1 = trunc i64 %li64_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %6 = udiv i64 %li64_3, undef
}
define void @and() {
@@ -161,6 +353,16 @@ define void @and() {
%li64_1 = load i64, i64* undef
and i64 %li64_0, %li64_1
+ ; Truncated load
+ %li64_2 = load i64, i64* undef
+ %tr_0 = trunc i64 %li64_2 to i32
+ and i32 %tr_0, undef
+
+ ; Loads with multiple uses are *not* folded
+ %li64_3 = load i64, i64* undef
+ %tr_1 = trunc i64 %li64_3 to i32
+ and i64 %li64_3, undef
+
ret void;
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = and i32 %li32, undef
@@ -172,6 +374,12 @@ define void @and() {
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = and i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_0 = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = and i32 %tr_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_1 = trunc i64 %li64_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = and i64 %li64_3, undef
}
define void @or() {
@@ -189,6 +397,16 @@ define void @or() {
%li64_1 = load i64, i64* undef
or i64 %li64_0, %li64_1
+ ; Truncated load
+ %li64_2 = load i64, i64* undef
+ %tr_0 = trunc i64 %li64_2 to i32
+ or i32 %tr_0, undef
+
+ ; Loads with multiple uses are *not* folded
+ %li64_3 = load i64, i64* undef
+ %tr_1 = trunc i64 %li64_3 to i32
+ or i64 %li64_3, undef
+
ret void;
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = or i32 %li32, undef
@@ -200,6 +418,12 @@ define void @or() {
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = or i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_0 = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = or i32 %tr_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_1 = trunc i64 %li64_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = or i64 %li64_3, undef
}
define void @xor() {
@@ -217,6 +441,16 @@ define void @xor() {
%li64_1 = load i64, i64* undef
xor i64 %li64_0, %li64_1
+ ; Truncated load
+ %li64_2 = load i64, i64* undef
+ %tr_0 = trunc i64 %li64_2 to i32
+ xor i32 %tr_0, undef
+
+ ; Loads with multiple uses are *not* folded
+ %li64_3 = load i64, i64* undef
+ %tr_1 = trunc i64 %li64_3 to i32
+ xor i64 %li64_3, undef
+
ret void;
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = xor i32 %li32, undef
@@ -228,6 +462,12 @@ define void @xor() {
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = xor i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_0 = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = xor i32 %tr_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_1 = trunc i64 %li64_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = xor i64 %li64_3, undef
}
define void @icmp() {
@@ -245,6 +485,16 @@ define void @icmp() {
%li64_1 = load i64, i64* undef
icmp eq i64 %li64_0, %li64_1
+ ; Truncated load
+ %li64_2 = load i64, i64* undef
+ %tr_0 = trunc i64 %li64_2 to i32
+ icmp eq i32 %tr_0, undef
+
+ ; Loads with multiple uses are *not* folded
+ %li64_3 = load i64, i64* undef
+ %tr_1 = trunc i64 %li64_3 to i32
+ icmp eq i64 %li64_3, undef
+
ret void;
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = icmp eq i32 %li32, undef
@@ -256,4 +506,10 @@ define void @icmp() {
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = icmp eq i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_0 = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = icmp eq i32 %tr_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_1 = trunc i64 %li64_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = icmp eq i64 %li64_3, undef
}
More information about the llvm-commits
mailing list