[llvm] r206141 - [PowerPC] [Constant Hoisting] Enable constant hoisting on PPC
Hal Finkel
hfinkel at anl.gov
Sun Apr 13 16:02:41 PDT 2014
Author: hfinkel
Date: Sun Apr 13 18:02:40 2014
New Revision: 206141
URL: http://llvm.org/viewvc/llvm-project?rev=206141&view=rev
Log:
[PowerPC] [Constant Hoisting] Enable constant hoisting on PPC
Implements the various TTI functions to enable constant hoisting on PPC. The
only significant test-suite change is this:
MultiSource/Benchmarks/VersaBench/bmm/bmm - 20% speedup
(which essentially reverses the slowdown from r206120).
Added:
llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/
llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll
llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg
llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/masks.ll
Modified:
llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
Modified: llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp?rev=206141&r1=206140&r2=206141&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCTargetTransformInfo.cpp Sun Apr 13 18:02:40 2014
@@ -18,11 +18,15 @@
#include "PPC.h"
#include "PPCTargetMachine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
+static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
+cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
+
// Declare the pass initialization routine locally as target-specific passes
// don't havve a target-wide initialization entry point, and so we rely on the
// pass constructor initialization.
@@ -67,6 +71,13 @@ public:
/// \name Scalar TTI Implementations
/// @{
+ unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
+
+ unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) const override;
+ unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty) const override;
+
virtual PopcntSupportKind
getPopcntSupport(unsigned TyWidth) const override;
virtual void getUnrollingPreferences(
@@ -123,6 +134,142 @@ PPCTTI::PopcntSupportKind PPCTTI::getPop
return PSK_Software;
}
+unsigned PPCTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+ if (DisablePPCConstHoist)
+ return TargetTransformInfo::getIntImmCost(Imm, Ty);
+
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0)
+ return ~0U;
+
+ if (Imm == 0)
+ return TCC_Free;
+
+ if (Imm.getBitWidth() <= 64) {
+ if (isInt<16>(Imm.getSExtValue()))
+ return TCC_Basic;
+
+ if (isInt<32>(Imm.getSExtValue())) {
+ // A constant that can be materialized using lis.
+ if ((Imm.getZExtValue() & 0xFFFF) == 0)
+ return TCC_Basic;
+
+ return 2 * TCC_Basic;
+ }
+ }
+
+ return 4 * TCC_Basic;
+}
+
+unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) const {
+ if (DisablePPCConstHoist)
+ return TargetTransformInfo::getIntImmCost(IID, Idx, Imm, Ty);
+
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0)
+ return ~0U;
+
+ switch (IID) {
+ default: return TCC_Free;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
+ return TCC_Free;
+ break;
+ }
+ return PPCTTI::getIntImmCost(Imm, Ty);
+}
+
+unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) const {
+ if (DisablePPCConstHoist)
+ return TargetTransformInfo::getIntImmCost(Opcode, Idx, Imm, Ty);
+
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0)
+ return ~0U;
+
+ unsigned ImmIdx = ~0U;
+ bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
+ ZeroFree = false;
+ switch (Opcode) {
+ default: return TCC_Free;
+ case Instruction::GetElementPtr:
+ // Always hoist the base address of a GetElementPtr. This prevents the
+ // creation of new constants for every base constant that gets constant
+ // folded with the offset.
+ if (Idx == 0)
+ return 2 * TCC_Basic;
+ return TCC_Free;
+ case Instruction::And:
+ RunFree = true; // (for the rotate-and-mask instructions)
+ // Fallthrough...
+ case Instruction::Add:
+ case Instruction::Or:
+ case Instruction::Xor:
+ ShiftedFree = true;
+ // Fallthrough...
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ ImmIdx = 1;
+ break;
+ case Instruction::ICmp:
+ UnsignedFree = true;
+ ImmIdx = 1;
+ // Fallthrough... (zero comparisons can use record-form instructions)
+ case Instruction::Select:
+ ZeroFree = true;
+ break;
+ case Instruction::PHI:
+ case Instruction::Call:
+ case Instruction::Ret:
+ case Instruction::Load:
+ case Instruction::Store:
+ break;
+ }
+
+ if (ZeroFree && Imm == 0)
+ return TCC_Free;
+
+ if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
+ if (isInt<16>(Imm.getSExtValue()))
+ return TCC_Free;
+
+ if (RunFree) {
+ if (Imm.getBitWidth() <= 32 &&
+ (isShiftedMask_32(Imm.getZExtValue()) ||
+ isShiftedMask_32(~Imm.getZExtValue())))
+ return TCC_Free;
+
+
+ if (ST->isPPC64() &&
+ (isShiftedMask_64(Imm.getZExtValue()) ||
+ isShiftedMask_64(~Imm.getZExtValue())))
+ return TCC_Free;
+ }
+
+ if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
+ return TCC_Free;
+
+ if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
+ return TCC_Free;
+ }
+
+ return PPCTTI::getIntImmCost(Imm, Ty);
+}
+
void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
if (ST->getDarwinDirective() == PPC::DIR_A2) {
// The A2 is in-order with a deep pipeline, and concatenation unrolling
Added: llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll?rev=206141&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll Sun Apr 13 18:02:40 2014
@@ -0,0 +1,23 @@
+; RUN: opt -S -consthoist < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%T = type { i32, i32, i32, i32 }
+
+; Test if even cheap base addresses are hoisted.
+define i32 @test1() nounwind {
+; CHECK-LABEL: @test1
+; CHECK: %const = bitcast i32 12345678 to i32
+; CHECK: %1 = inttoptr i32 %const to %T*
+; CHECK: %addr1 = getelementptr %T* %1, i32 0, i32 1
+ %addr1 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 1
+ %tmp1 = load i32* %addr1
+ %addr2 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 2
+ %tmp2 = load i32* %addr2
+ %addr3 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 3
+ %tmp3 = load i32* %addr3
+ %tmp4 = add i32 %tmp1, %tmp2
+ %tmp5 = add i32 %tmp3, %tmp4
+ ret i32 %tmp5
+}
+
Added: llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg?rev=206141&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg Sun Apr 13 18:02:40 2014
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'PowerPC' in targets:
+ config.unsupported = True
+
Added: llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/masks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/masks.ll?rev=206141&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/masks.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/masks.ll Sun Apr 13 18:02:40 2014
@@ -0,0 +1,66 @@
+; RUN: opt -S -consthoist < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Here the masks are all contiguous, and should not be hoisted.
+define i32 @test1() nounwind {
+entry:
+; CHECK-LABEL: @test1
+; CHECK-NOT: bitcast i32 65535 to i32
+; CHECK: and i32 undef, 65535
+ %conv121 = and i32 undef, 65535
+ br i1 undef, label %if.then152, label %if.end167
+
+if.then152:
+; CHECK: and i32 undef, 65535
+ %conv153 = and i32 undef, 65535
+ br i1 undef, label %if.end167, label %end2
+
+if.end167:
+; CHECK: and i32 {{.*}}, 32768
+ %shl161 = shl nuw nsw i32 %conv121, 15
+ %0 = load i8* undef, align 1
+ %conv169 = zext i8 %0 to i32
+ %shl170 = shl nuw nsw i32 %conv169, 7
+ %shl161.masked = and i32 %shl161, 32768
+ %conv174 = or i32 %shl170, %shl161.masked
+ %cmp178 = icmp ugt i32 %conv174, 32767
+ br i1 %cmp178, label %end1, label %end2
+
+end1:
+ unreachable
+
+end2:
+ unreachable
+}
+
+; Here the masks are not contiguous, and should be hoisted.
+define i32 @test2() nounwind {
+entry:
+; CHECK-LABEL: @test2
+; CHECK: bitcast i32 65531 to i32
+ %conv121 = and i32 undef, 65531
+ br i1 undef, label %if.then152, label %if.end167
+
+if.then152:
+ %conv153 = and i32 undef, 65531
+ br i1 undef, label %if.end167, label %end2
+
+if.end167:
+; CHECK: add i32 {{.*}}, -32758
+ %shl161 = shl nuw nsw i32 %conv121, 15
+ %0 = load i8* undef, align 1
+ %conv169 = zext i8 %0 to i32
+ %shl170 = shl nuw nsw i32 %conv169, 7
+ %shl161.masked = and i32 %shl161, 32773
+ %conv174 = or i32 %shl170, %shl161.masked
+ %cmp178 = icmp ugt i32 %conv174, 32767
+ br i1 %cmp178, label %end1, label %end2
+
+end1:
+ unreachable
+
+end2:
+ unreachable
+}
+
More information about the llvm-commits
mailing list