[llvm] r345327 - [SystemZ] Improve getMemoryOpCost() to find foldable loads that are converted.

Jonas Paulsson via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 25 15:28:25 PDT 2018


Author: jonpa
Date: Thu Oct 25 15:28:25 2018
New Revision: 345327

URL: http://llvm.org/viewvc/llvm-project?rev=345327&view=rev
Log:
[SystemZ] Improve getMemoryOpCost() to find foldable loads that are converted.

The SystemZ backend can do arithmetic of memory by loading and then extending
one of the operands. Similarly, a load + truncate can be folded into an
operand.

This patch improves the SystemZ TTI cost function to recognize this.

Review: Ulrich Weigand
https://reviews.llvm.org/D52692

Modified:
    llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
    llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h
    llvm/trunk/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll

Modified: llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp?rev=345327&r1=345326&r2=345327&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp Thu Oct 25 15:28:25 2018
@@ -849,54 +849,102 @@ getVectorInstrCost(unsigned Opcode, Type
   return BaseT::getVectorInstrCost(Opcode, Val, Index);
 }
 
+// Check if a load may be folded as a memory operand in its user.
+bool SystemZTTIImpl::
+isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue) {
+  if (!Ld->hasOneUse())
+    return false;
+  FoldedValue = Ld;
+  const Instruction *UserI = cast<Instruction>(*Ld->user_begin());
+  unsigned LoadedBits = getScalarSizeInBits(Ld->getType());
+  unsigned TruncBits = 0;
+  unsigned SExtBits = 0;
+  unsigned ZExtBits = 0;
+  if (UserI->hasOneUse()) {
+    unsigned UserBits = UserI->getType()->getScalarSizeInBits();
+    if (isa<TruncInst>(UserI))
+      TruncBits = UserBits;
+    else if (isa<SExtInst>(UserI))
+      SExtBits = UserBits;
+    else if (isa<ZExtInst>(UserI))
+      ZExtBits = UserBits;
+  }
+  if (TruncBits || SExtBits || ZExtBits) {
+    FoldedValue = UserI;
+    UserI = cast<Instruction>(*UserI->user_begin());
+    // Load (single use) -> trunc/extend (single use) -> UserI
+  }
+  switch (UserI->getOpcode()) {
+  case Instruction::Add: // SE: 16->32, 16/32->64, z14:16->64. ZE: 32->64
+  case Instruction::Sub:
+    if (LoadedBits == 32 && ZExtBits == 64)
+      return true;
+    LLVM_FALLTHROUGH;
+  case Instruction::Mul: // SE: 16->32, 32->64, z14:16->64
+    if (LoadedBits == 16 &&
+        (SExtBits == 32 ||
+         (SExtBits == 64 && ST->hasMiscellaneousExtensions2())))
+      return true;
+    LLVM_FALLTHROUGH;
+  case Instruction::SDiv:// SE: 32->64
+    if (LoadedBits == 32 && SExtBits == 64)
+      return true;
+    LLVM_FALLTHROUGH;
+  case Instruction::UDiv:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::ICmp:
+    // This also makes sense for float operations, but disabled for now due
+    // to regressions.
+    // case Instruction::FCmp:
+    // case Instruction::FAdd:
+    // case Instruction::FSub:
+    // case Instruction::FMul:
+    // case Instruction::FDiv:
+
+    // All possible extensions of memory checked above.
+    if (SExtBits || ZExtBits)
+      return false;
+
+    unsigned LoadOrTruncBits = (TruncBits ? TruncBits : LoadedBits);
+    return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64);
+    break;
+  }
+  return false;
+}
+
 int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                     unsigned Alignment, unsigned AddressSpace,
                                     const Instruction *I) {
   assert(!Src->isVoidTy() && "Invalid type");
 
-  if (!Src->isVectorTy() && Opcode == Instruction::Load &&
-      I != nullptr && I->hasOneUse()) {
-      const Instruction *UserI = cast<Instruction>(*I->user_begin());
-      unsigned Bits = getScalarSizeInBits(Src);
-      bool FoldsLoad = false;
-      switch (UserI->getOpcode()) {
-      case Instruction::ICmp:
-      case Instruction::Add:
-      case Instruction::Sub:
-      case Instruction::Mul:
-      case Instruction::SDiv:
-      case Instruction::UDiv:
-      case Instruction::And:
-      case Instruction::Or:
-      case Instruction::Xor:
-      // This also makes sense for float operations, but disabled for now due
-      // to regressions.
-      // case Instruction::FCmp:
-      // case Instruction::FAdd:
-      // case Instruction::FSub:
-      // case Instruction::FMul:
-      // case Instruction::FDiv:
-        FoldsLoad = (Bits == 32 || Bits == 64);
-        break;
-      }
-
-      if (FoldsLoad) {
-        assert (UserI->getNumOperands() == 2 &&
-                "Expected to only handle binops.");
-
-        // UserI can't fold two loads, so in that case return 0 cost only
-        // half of the time.
-        for (unsigned i = 0; i < 2; ++i) {
-          if (UserI->getOperand(i) == I)
-            continue;
-          if (LoadInst *LI = dyn_cast<LoadInst>(UserI->getOperand(i))) {
-            if (LI->hasOneUse())
-              return i == 0;
-          }
+  if (!Src->isVectorTy() && Opcode == Instruction::Load && I != nullptr) {
+    // Store the load or its truncated or extended value in FoldedValue.
+    const Instruction *FoldedValue = nullptr;
+    if (isFoldableLoad(cast<LoadInst>(I), FoldedValue)) {
+      const Instruction *UserI = cast<Instruction>(*FoldedValue->user_begin());
+      assert (UserI->getNumOperands() == 2 && "Expected a binop.");
+
+      // UserI can't fold two loads, so in that case return 0 cost only
+      // half of the time.
+      for (unsigned i = 0; i < 2; ++i) {
+        if (UserI->getOperand(i) == FoldedValue)
+          continue;
+
+        if (Instruction *OtherOp = dyn_cast<Instruction>(UserI->getOperand(i))){
+          LoadInst *OtherLoad = dyn_cast<LoadInst>(OtherOp);
+          if (!OtherLoad &&
+              (isa<TruncInst>(OtherOp) || isa<SExtInst>(OtherOp) ||
+               isa<ZExtInst>(OtherOp)))
+            OtherLoad = dyn_cast<LoadInst>(OtherOp->getOperand(0));
+          if (OtherLoad && isFoldableLoad(OtherLoad, FoldedValue/*dummy*/))
+            return i == 0; // Both operands foldable.
         }
-
-        return 0;
       }
+
+      return 0; // Only I is foldable in user.
+    }
   }
 
   unsigned NumOps =

Modified: llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h?rev=345327&r1=345326&r2=345327&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.h Thu Oct 25 15:28:25 2018
@@ -85,6 +85,7 @@ public:
   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
                          const Instruction *I = nullptr);
   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+  bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
                       unsigned AddressSpace, const Instruction *I = nullptr);
 

Modified: llvm/trunk/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll?rev=345327&r1=345326&r2=345327&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll Thu Oct 25 15:28:25 2018
@@ -1,4 +1,7 @@
-; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \
+; RUN:  | FileCheck %s -check-prefixes=CHECK,Z13
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z14 \
+; RUN:  | FileCheck %s -check-prefixes=CHECK,Z14
 ;
 ; Test that loads into operations that can fold one memory operand get zero
 ; cost. In the case that both operands are loaded, one load should get a cost
@@ -19,6 +22,35 @@ define void @add() {
   %li64_1 = load i64, i64* undef
   add i64 %li64_0, %li64_1
 
+  ; Truncated load
+  %li64_2 = load i64, i64* undef
+  %tr = trunc i64 %li64_2 to i32
+  add i32 %tr, undef
+
+  ; Sign-extended loads
+  %li16_0 = load i16, i16* undef
+  %sext_0 = sext i16 %li16_0 to i32
+  add i32 %sext_0, undef
+
+  %li16_1 = load i16, i16* undef
+  %sext_1 = sext i16 %li16_1 to i64
+  add i64 %sext_1, undef
+
+  %li32_2 = load i32, i32* undef
+  %sext_2 = sext i32 %li32_2 to i64
+  add i64 %sext_2, undef
+
+  ; Zero-extended loads
+  %li32_3 = load i32, i32* undef
+  %zext_0 = zext i32 %li32_3 to i64
+  add i64 %zext_0, undef
+
+  ; Loads with multiple uses are *not* folded
+  %li16_3 = load i16, i16* undef
+  %sext_3 = sext i16 %li16_3 to i32
+  %sext_4 = sext i16 %li16_3 to i32
+  add i32 %sext_3, undef
+
   ret void;
 
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li32 = load i32, i32* undef
@@ -31,6 +63,26 @@ define void @add() {
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_0 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li64_1 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %4 = add i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %tr = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %5 = add i32 %tr, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li16_0 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_0 = sext i16 %li16_0 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %6 = add i32 %sext_0, undef
+; Z13:   Cost Model: Found an estimated cost of 1 for instruction:   %li16_1 = load i16, i16* undef
+; Z14:   Cost Model: Found an estimated cost of 0 for instruction:   %li16_1 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_1 = sext i16 %li16_1 to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %7 = add i64 %sext_1, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li32_2 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_2 = sext i32 %li32_2 to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %8 = add i64 %sext_2, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li32_3 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %zext_0 = zext i32 %li32_3 to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %9 = add i64 %zext_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li16_3 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_3 = sext i16 %li16_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_4 = sext i16 %li16_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %10 = add i32 %sext_3, undef
 }
 
 define void @sub() {
@@ -48,6 +100,35 @@ define void @sub() {
   %li64_1 = load i64, i64* undef
   sub i64 %li64_0, %li64_1
 
+  ; Truncated load
+  %li64_2 = load i64, i64* undef
+  %tr = trunc i64 %li64_2 to i32
+  sub i32 %tr, undef
+
+  ; Sign-extended loads
+  %li16_0 = load i16, i16* undef
+  %sext_0 = sext i16 %li16_0 to i32
+  sub i32 %sext_0, undef
+
+  %li16_1 = load i16, i16* undef
+  %sext_1 = sext i16 %li16_1 to i64
+  sub i64 %sext_1, undef
+
+  %li32_2 = load i32, i32* undef
+  %sext_2 = sext i32 %li32_2 to i64
+  sub i64 %sext_2, undef
+
+  ; Zero-extended loads
+  %li32_3 = load i32, i32* undef
+  %zext_0 = zext i32 %li32_3 to i64
+  sub i64 %zext_0, undef
+
+  ; Loads with multiple uses are *not* folded
+  %li16_3 = load i16, i16* undef
+  %sext_3 = sext i16 %li16_3 to i32
+  %sext_4 = sext i16 %li16_3 to i32
+  sub i32 %sext_3, undef
+
   ret void;
 
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li32 = load i32, i32* undef
@@ -60,6 +141,26 @@ define void @sub() {
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_0 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li64_1 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %4 = sub i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %tr = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %5 = sub i32 %tr, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li16_0 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_0 = sext i16 %li16_0 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %6 = sub i32 %sext_0, undef
+; Z13:   Cost Model: Found an estimated cost of 1 for instruction:   %li16_1 = load i16, i16* undef
+; Z14:   Cost Model: Found an estimated cost of 0 for instruction:   %li16_1 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_1 = sext i16 %li16_1 to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %7 = sub i64 %sext_1, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li32_2 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_2 = sext i32 %li32_2 to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %8 = sub i64 %sext_2, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li32_3 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %zext_0 = zext i32 %li32_3 to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %9 = sub i64 %zext_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li16_3 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_3 = sext i16 %li16_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_4 = sext i16 %li16_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %10 = sub i32 %sext_3, undef
 }
 
 define void @mul() {
@@ -77,6 +178,35 @@ define void @mul() {
   %li64_1 = load i64, i64* undef
   mul i64 %li64_0, %li64_1
 
+  ; Truncated load
+  %li64_2 = load i64, i64* undef
+  %tr = trunc i64 %li64_2 to i32
+  mul i32 %tr, undef
+
+  ; Sign-extended loads
+  %li16_0 = load i16, i16* undef
+  %sext_0 = sext i16 %li16_0 to i32
+  mul i32 %sext_0, undef
+
+  %li16_1 = load i16, i16* undef
+  %sext_1 = sext i16 %li16_1 to i64
+  mul i64 %sext_1, undef
+
+  %li32_2 = load i32, i32* undef
+  %sext_2 = sext i32 %li32_2 to i64
+  mul i64 %sext_2, undef
+
+  ; Zero-extended loads are *not* folded
+  %li16_2 = load i16, i16* undef
+  %zext_0 = zext i16 %li16_2 to i32
+  mul i32 %zext_0, undef
+
+  ; Loads with multiple uses are *not* folded
+  %li16_3 = load i16, i16* undef
+  %sext_3 = sext i16 %li16_3 to i32
+  %sext_4 = sext i16 %li16_3 to i32
+  mul i32 %sext_3, undef
+
   ret void;
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li32 = load i32, i32* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %1 = mul i32 %li32, undef
@@ -88,6 +218,26 @@ define void @mul() {
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_0 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li64_1 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %4 = mul i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %tr = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %5 = mul i32 %tr, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li16_0 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_0 = sext i16 %li16_0 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %6 = mul i32 %sext_0, undef
+; Z13:   Cost Model: Found an estimated cost of 1 for instruction:   %li16_1 = load i16, i16* undef
+; Z14:   Cost Model: Found an estimated cost of 0 for instruction:   %li16_1 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_1 = sext i16 %li16_1 to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %7 = mul i64 %sext_1, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li32_2 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_2 = sext i32 %li32_2 to i64
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %8 = mul i64 %sext_2, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li16_2 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %zext_0 = zext i16 %li16_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %9 = mul i32 %zext_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li16_3 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_3 = sext i16 %li16_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_4 = sext i16 %li16_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %10 = mul i32 %sext_3, undef
 }
 
 define void @sdiv(i32 %arg32, i64 %arg64) {
@@ -105,6 +255,22 @@ define void @sdiv(i32 %arg32, i64 %arg64
   %li64_1 = load i64, i64* undef
   sdiv i64 %li64_0, %li64_1
 
+  ; Truncated load
+  %li64_2 = load i64, i64* undef
+  %tr = trunc i64 %li64_2 to i32
+  sdiv i32 %tr, undef
+
+  ; Sign-extended loads
+  %li32_2 = load i32, i32* undef
+  %sext_0 = sext i32 %li32_2 to i64
+  sdiv i64 %sext_0, undef
+
+  ; Loads with multiple uses are *not* folded
+  %li32_3 = load i32, i32* undef
+  %sext_1 = sext i32 %li32_3 to i64
+  %sext_2 = sext i32 %li32_3 to i64
+  sdiv i64 %sext_1, undef
+
   ret void;
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li32 = load i32, i32* undef
 ; CHECK: Cost Model: Found an estimated cost of 21 for instruction:  %1 = sdiv i32 %li32, %arg32
@@ -116,6 +282,16 @@ define void @sdiv(i32 %arg32, i64 %arg64
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_0 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li64_1 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 20 for instruction:  %4 = sdiv i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %tr = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %5 = sdiv i32 %tr, undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li32_2 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_0 = sext i32 %li32_2 to i64
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %6 = sdiv i64 %sext_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li32_3 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_1 = sext i32 %li32_3 to i64
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %sext_2 = sext i32 %li32_3 to i64
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %7 = sdiv i64 %sext_1, undef
 }
 
 define void @udiv(i32 %arg32, i64 %arg64) {
@@ -133,6 +309,16 @@ define void @udiv(i32 %arg32, i64 %arg64
   %li64_1 = load i64, i64* undef
   udiv i64 %li64_0, %li64_1
 
+  ; Truncated load
+  %li64_2 = load i64, i64* undef
+  %tr_0 = trunc i64 %li64_2 to i32
+  udiv i32 %tr_0, undef
+
+  ; Loads with multiple uses are *not* folded
+  %li64_3 = load i64, i64* undef
+  %tr_1 = trunc i64 %li64_3 to i32
+  udiv i64 %li64_3, undef
+
   ret void;
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li32 = load i32, i32* undef
 ; CHECK: Cost Model: Found an estimated cost of 21 for instruction:  %1 = udiv i32 %li32, %arg32
@@ -144,6 +330,12 @@ define void @udiv(i32 %arg32, i64 %arg64
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_0 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li64_1 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 21 for instruction:  %4 = udiv i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %tr_0 = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %5 = udiv i32 %tr_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li64_3 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %tr_1 = trunc i64 %li64_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 10 for instruction:   %6 = udiv i64 %li64_3, undef
 }
 
 define void @and() {
@@ -161,6 +353,16 @@ define void @and() {
   %li64_1 = load i64, i64* undef
   and i64 %li64_0, %li64_1
 
+  ; Truncated load
+  %li64_2 = load i64, i64* undef
+  %tr_0 = trunc i64 %li64_2 to i32
+  and i32 %tr_0, undef
+
+  ; Loads with multiple uses are *not* folded
+  %li64_3 = load i64, i64* undef
+  %tr_1 = trunc i64 %li64_3 to i32
+  and i64 %li64_3, undef
+
   ret void;
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li32 = load i32, i32* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %1 = and i32 %li32, undef
@@ -172,6 +374,12 @@ define void @and() {
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_0 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li64_1 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %4 = and i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %tr_0 = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %5 = and i32 %tr_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li64_3 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %tr_1 = trunc i64 %li64_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %6 = and i64 %li64_3, undef
 }
 
 define void @or() {
@@ -189,6 +397,16 @@ define void @or() {
   %li64_1 = load i64, i64* undef
   or i64 %li64_0, %li64_1
 
+  ; Truncated load
+  %li64_2 = load i64, i64* undef
+  %tr_0 = trunc i64 %li64_2 to i32
+  or i32 %tr_0, undef
+
+  ; Loads with multiple uses are *not* folded
+  %li64_3 = load i64, i64* undef
+  %tr_1 = trunc i64 %li64_3 to i32
+  or i64 %li64_3, undef
+
   ret void;
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li32 = load i32, i32* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %1 = or i32 %li32, undef
@@ -200,6 +418,12 @@ define void @or() {
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_0 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li64_1 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %4 = or i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %tr_0 = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %5 = or i32 %tr_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li64_3 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %tr_1 = trunc i64 %li64_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %6 = or i64 %li64_3, undef
 }
 
 define void @xor() {
@@ -217,6 +441,16 @@ define void @xor() {
   %li64_1 = load i64, i64* undef
   xor i64 %li64_0, %li64_1
 
+  ; Truncated load
+  %li64_2 = load i64, i64* undef
+  %tr_0 = trunc i64 %li64_2 to i32
+  xor i32 %tr_0, undef
+
+  ; Loads with multiple uses are *not* folded
+  %li64_3 = load i64, i64* undef
+  %tr_1 = trunc i64 %li64_3 to i32
+  xor i64 %li64_3, undef
+
   ret void;
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li32 = load i32, i32* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %1 = xor i32 %li32, undef
@@ -228,6 +462,12 @@ define void @xor() {
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_0 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li64_1 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %4 = xor i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %tr_0 = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %5 = xor i32 %tr_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li64_3 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %tr_1 = trunc i64 %li64_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %6 = xor i64 %li64_3, undef
 }
 
 define void @icmp() {
@@ -245,6 +485,16 @@ define void @icmp() {
   %li64_1 = load i64, i64* undef
   icmp eq i64 %li64_0, %li64_1
 
+  ; Truncated load
+  %li64_2 = load i64, i64* undef
+  %tr_0 = trunc i64 %li64_2 to i32
+  icmp eq i32 %tr_0, undef
+
+  ; Loads with multiple uses are *not* folded
+  %li64_3 = load i64, i64* undef
+  %tr_1 = trunc i64 %li64_3 to i32
+  icmp eq i64 %li64_3, undef
+
   ret void;
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li32 = load i32, i32* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %1 = icmp eq i32 %li32, undef
@@ -256,4 +506,10 @@ define void @icmp() {
 ; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_0 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li64_1 = load i64, i64* undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %4 = icmp eq i64 %li64_0, %li64_1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %tr_0 = trunc i64 %li64_2 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %5 = icmp eq i32 %tr_0, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %li64_3 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction:   %tr_1 = trunc i64 %li64_3 to i32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %6 = icmp eq i64 %li64_3, undef
 }




More information about the llvm-commits mailing list