[llvm] r173849 - Adding simple cast cost to ARM
Renato Golin
renato.golin at linaro.org
Tue Jan 29 15:31:38 PST 2013
Author: rengolin
Date: Tue Jan 29 17:31:38 2013
New Revision: 173849
URL: http://llvm.org/viewvc/llvm-project?rev=173849&view=rev
Log:
Adding simple cast cost to ARM
Changing ARMBaseTargetMachine to return ARMTargetLowering intead of
the generic one (similar to x86 code).
Tests showing which instructions were added to cast when necessary
or cost zero when not. Downcast to 16 bits are not lowered in NEON,
so costs are not there yet.
Added:
llvm/trunk/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll
Modified:
llvm/trunk/lib/Target/ARM/ARMTargetMachine.h
llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp
Modified: llvm/trunk/lib/Target/ARM/ARMTargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetMachine.h?rev=173849&r1=173848&r2=173849&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetMachine.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetMachine.h Tue Jan 29 17:31:38 2013
@@ -46,6 +46,10 @@ public:
virtual ARMJITInfo *getJITInfo() { return &JITInfo; }
virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual const ARMTargetLowering *getTargetLowering() const {
+ // Implemented by derived classes
+ llvm_unreachable("getTargetLowering not implemented");
+ }
virtual const InstrItineraryData *getInstrItineraryData() const {
return &InstrItins;
}
Modified: llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp?rev=173849&r1=173848&r2=173849&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp Tue Jan 29 17:31:38 2013
@@ -20,6 +20,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
using namespace llvm;
// Declare the pass initialization routine locally as target-specific passes
@@ -34,18 +35,20 @@ namespace {
class ARMTTI : public ImmutablePass, public TargetTransformInfo {
const ARMBaseTargetMachine *TM;
const ARMSubtarget *ST;
+ const ARMTargetLowering *TLI;
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
/// are set if the result needs to be inserted and/or extracted from vectors.
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
public:
- ARMTTI() : ImmutablePass(ID), TM(0), ST(0) {
+ ARMTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
llvm_unreachable("This pass cannot be directly constructed");
}
ARMTTI(const ARMBaseTargetMachine *TM)
- : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()) {
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ TLI(TM->getTargetLowering()) {
initializeARMTTIPass(*PassRegistry::getPassRegistry());
}
@@ -111,6 +114,9 @@ public:
return 1;
}
+ unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const;
+
/// @}
};
@@ -157,3 +163,37 @@ unsigned ARMTTI::getIntImmCost(const API
}
return 2;
}
+
+unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const {
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ EVT SrcTy = TLI->getValueType(Src);
+ EVT DstTy = TLI->getValueType(Dst);
+
+ if (!SrcTy.isSimple() || !DstTy.isSimple())
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+
+ // Some arithmetic, load and store operations have specific instructions
+ // to cast up/down their types automatically at no extra cost
+ // TODO: Get these tables to know at least what the related operations are
+ static const TypeConversionCostTblEntry<MVT> NEONConversionTbl[] = {
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
+ { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
+ { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
+ { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
+ };
+
+ if (ST->hasNEON()) {
+ int Idx = ConvertCostTableLookup<MVT>(NEONConversionTbl,
+ array_lengthof(NEONConversionTbl),
+ ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONConversionTbl[Idx].Cost;
+ }
+
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
Added: llvm/trunk/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll?rev=173849&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll Tue Jan 29 17:31:38 2013
@@ -0,0 +1,114 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
+; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s
+; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM
+
+; ModuleID = 'arm.ll'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+%T216 = type <2 x i16>
+%T232 = type <2 x i32>
+%T264 = type <2 x i64>
+
+%T416 = type <4 x i16>
+%T432 = type <4 x i32>
+%T464 = type <4 x i64>
+
+define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'direct':
+ %v0 = load %T432* %loadaddr
+; ASM: vld1.64
+ %v1 = load %T432* %loadaddr2
+; ASM: vld1.64
+ %r3 = mul %T432 %v0, %v1
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmul.i32
+ store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+ ret void
+}
+
+define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'ups1632':
+ %v0 = load %T416* %loadaddr
+; ASM: vldr
+ %v1 = load %T416* %loadaddr2
+; ASM: vldr
+ %r1 = sext %T416 %v0 to %T432
+ %r2 = sext %T416 %v1 to %T432
+; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32>
+ %r3 = mul %T432 %r1, %r2
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmull.s16
+ store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+ ret void
+}
+
+define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'upu1632':
+ %v0 = load %T416* %loadaddr
+; ASM: vldr
+ %v1 = load %T416* %loadaddr2
+; ASM: vldr
+ %r1 = zext %T416 %v0 to %T432
+ %r2 = zext %T416 %v1 to %T432
+; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32>
+ %r3 = mul %T432 %r1, %r2
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmull.u16
+ store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+ ret void
+}
+
+define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+; COST: function 'ups3264':
+ %v0 = load %T232* %loadaddr
+; ASM: vldr
+ %v1 = load %T232* %loadaddr2
+; ASM: vldr
+ %r3 = mul %T232 %v0, %v1
+; ASM: vmul.i32
+; COST: cost of 1 for instruction: {{.*}} mul <2 x i32>
+ %st = sext %T232 %r3 to %T264
+; ASM: vmovl.s32
+; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64>
+ store %T264 %st, %T264* %storeaddr
+; ASM: vst1.64
+ ret void
+}
+
+define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+; COST: function 'upu3264':
+ %v0 = load %T232* %loadaddr
+; ASM: vldr
+ %v1 = load %T232* %loadaddr2
+; ASM: vldr
+ %r3 = mul %T232 %v0, %v1
+; ASM: vmul.i32
+; COST: cost of 1 for instruction: {{.*}} mul <2 x i32>
+ %st = zext %T232 %r3 to %T264
+; ASM: vmovl.u32
+; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64>
+ store %T264 %st, %T264* %storeaddr
+; ASM: vst1.64
+ ret void
+}
+
+define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) {
+; COST: function 'dn3216':
+ %v0 = load %T432* %loadaddr
+; ASM: vld1.64
+ %v1 = load %T432* %loadaddr2
+; ASM: vld1.64
+ %r3 = mul %T432 %v0, %v1
+; ASM: vmul.i32
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+ %st = trunc %T432 %r3 to %T416
+; ASM: vmovn.i32
+; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16>
+ store %T416 %st, %T416* %storeaddr
+; ASM: vstr
+ ret void
+}
More information about the llvm-commits
mailing list