[llvm] [SystemZ] i128 cost model (PR #78528)
Jonas Paulsson via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 18 09:20:07 PST 2024
https://github.com/JonPsson1 updated https://github.com/llvm/llvm-project/pull/78528
>From 4b80902a18d5467412a7580f096fc8929e8fe98e Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Wed, 17 Jan 2024 17:33:03 -0600
Subject: [PATCH 1/3] Mainline i128 cost model
---
.../SystemZ/SystemZTargetTransformInfo.cpp | 133 +++++++++----
.../SystemZ/SystemZTargetTransformInfo.h | 2 +
.../CostModel/SystemZ/i128-cmp-ext-conv.ll | 185 ++++++++++++++++++
.../Analysis/CostModel/SystemZ/int-arith.ll | 6 +
.../Analysis/CostModel/SystemZ/intrinsics.ll | 9 +
.../CostModel/SystemZ/load-to-trunc.ll | 27 ---
.../Analysis/CostModel/SystemZ/load_store.ll | 72 ++++---
.../Analysis/CostModel/SystemZ/logic-i128.ll | 48 +++++
.../Analysis/CostModel/SystemZ/logical.ll | 12 ++
9 files changed, 404 insertions(+), 90 deletions(-)
create mode 100644 llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll
delete mode 100644 llvm/test/Analysis/CostModel/SystemZ/load-to-trunc.ll
create mode 100644 llvm/test/Analysis/CostModel/SystemZ/logic-i128.ll
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index e21d3090ba2fd1a..d69ff9e96c3e473 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -75,8 +75,8 @@ InstructionCost SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
// here, so that constant hoisting will ignore this constant.
if (BitSize == 0)
return TTI::TCC_Free;
- // No cost model for operations on integers larger than 64 bit implemented yet.
- if (BitSize > 64)
+ // No cost model for operations on integers larger than 128 bit implemented yet.
+ if ((!ST->hasVector() && BitSize > 64) || BitSize > 128)
return TTI::TCC_Free;
if (Imm == 0)
@@ -96,7 +96,8 @@ InstructionCost SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
return 2 * TTI::TCC_Basic;
}
- return 4 * TTI::TCC_Basic;
+ // i128 immediates loads from Constant Pool
+ return 2 * TTI::TCC_Basic;
}
InstructionCost SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
@@ -466,6 +467,8 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
}
}
+ unsigned ImmLoadCost = 0;
+
if (!Ty->isVectorTy()) {
// These FP operations are supported with a dedicated instruction for
// float, double and fp128 (base implementation assumes float generally
@@ -478,30 +481,43 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
if (Opcode == Instruction::FRem)
return LIBCALL_COST;
+ // Most i128 immediates must be loaded from the constant pool.
+ if (Ty->isIntegerTy(128))
+ for (const Value *A : Args)
+ if (auto *C = dyn_cast<ConstantInt>(A))
+ if (Opcode != Instruction::Xor || !C->isAllOnesValue())
+ ImmLoadCost++;
+
// Give discount for some combined logical operations if supported.
- if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) {
+ if (Args.size() == 2) {
if (Opcode == Instruction::Xor) {
for (const Value *A : Args) {
if (const Instruction *I = dyn_cast<Instruction>(A))
if (I->hasOneUse() &&
- (I->getOpcode() == Instruction::And ||
- I->getOpcode() == Instruction::Or ||
+ (I->getOpcode() == Instruction::Or ||
+ I->getOpcode() == Instruction::And ||
I->getOpcode() == Instruction::Xor))
- return 0;
+ if ((ScalarBits <= 64 && ST->hasMiscellaneousExtensions3()) ||
+ (isInt128InVR(Ty) &&
+ (I->getOpcode() == Instruction::Or || ST->hasVectorEnhancements1())))
+ return 0 + ImmLoadCost;
}
}
- else if (Opcode == Instruction::Or || Opcode == Instruction::And) {
+ else if (Opcode == Instruction::And || Opcode == Instruction::Or) {
for (const Value *A : Args) {
if (const Instruction *I = dyn_cast<Instruction>(A))
- if (I->hasOneUse() && I->getOpcode() == Instruction::Xor)
- return 0;
+ if ((I->hasOneUse() && I->getOpcode() == Instruction::Xor) &&
+ ((ScalarBits <= 64 && ST->hasMiscellaneousExtensions3()) ||
+ (isInt128InVR(Ty) &&
+ (Opcode == Instruction::And || ST->hasVectorEnhancements1()))))
+ return 0 + ImmLoadCost;
}
}
}
// Or requires one instruction, although it has custom handling for i64.
if (Opcode == Instruction::Or)
- return 1;
+ return 1 + ImmLoadCost;
if (Opcode == Instruction::Xor && ScalarBits == 1) {
if (ST->hasLoadStoreOnCond2())
@@ -589,7 +605,7 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
// Fallback to the default implementation.
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
- Args, CxtI);
+ Args, CxtI) + ImmLoadCost;
}
InstructionCost SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
@@ -774,29 +790,63 @@ InstructionCost SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
assert (!Dst->isVectorTy());
if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
+ if (Src->isIntegerTy(128))
+ return LIBCALL_COST;
if (SrcScalarBits >= 32 ||
(I != nullptr && isa<LoadInst>(I->getOperand(0))))
return 1;
return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/;
}
- if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
- Src->isIntegerTy(1)) {
- if (ST->hasLoadStoreOnCond2())
- return 2; // li 0; loc 1
-
- // This should be extension of a compare i1 result, which is done with
- // ipm and a varying sequence of instructions.
- unsigned Cost = 0;
- if (Opcode == Instruction::SExt)
- Cost = (DstScalarBits < 64 ? 3 : 4);
- if (Opcode == Instruction::ZExt)
- Cost = 3;
- Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr);
- if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
- // If operands of an fp-type was compared, this costs +1.
- Cost++;
- return Cost;
+ if ((Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) &&
+ Dst->isIntegerTy(128))
+ return LIBCALL_COST;
+
+ if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt)) {
+ if (Src->isIntegerTy(1)) {
+ if (DstScalarBits == 128)
+ return 5 /*branch seq.*/;
+
+ if (ST->hasLoadStoreOnCond2())
+ return 2; // li 0; loc 1
+
+ // This should be extension of a compare i1 result, which is done with
+ // ipm and a varying sequence of instructions.
+ unsigned Cost = 0;
+ if (Opcode == Instruction::SExt)
+ Cost = (DstScalarBits < 64 ? 3 : 4);
+ if (Opcode == Instruction::ZExt)
+ Cost = 3;
+ Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr);
+ if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
+ // If operands of an fp-type was compared, this costs +1.
+ Cost++;
+ return Cost;
+ }
+ else if (isInt128InVR(Dst)) {
+ // Extensions from GPR to i128 (in VR) typically costs two instructions,
+ // but a zero-extending load would be just one extra instruction.
+ if (Opcode == Instruction::ZExt && I != nullptr)
+ if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
+ if (Ld->hasOneUse())
+ return 1;
+ return 2;
+ }
+ }
+
+ if (Opcode == Instruction::Trunc && isInt128InVR(Src) && I != nullptr) {
+ if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
+ if (Ld->hasOneUse())
+ return 0; // Will be converted to GPR load.
+ bool OnlyTruncatingStores = true;
+ for (const User *U : I->users())
+ if (!isa<StoreInst>(U)) {
+ OnlyTruncatingStores = false;
+ break;
+ }
+ if (OnlyTruncatingStores)
+ return 0;
+ return 2; // Vector element extraction.
}
}
else if (ST->hasVector()) {
@@ -930,7 +980,7 @@ InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
// A loaded value compared with 0 with multiple users becomes Load and
// Test. The load is then not foldable, so return 0 cost for the ICmp.
unsigned ScalarBits = ValTy->getScalarSizeInBits();
- if (I != nullptr && ScalarBits >= 32)
+ if (I != nullptr && (ScalarBits == 32 || ScalarBits == 64))
if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() &&
@@ -940,11 +990,21 @@ InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
unsigned Cost = 1;
if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2);
+ if (isInt128InVR(ValTy) && I != nullptr &&
+ isa<ConstantInt>(I->getOperand(1)))
+ Cost++;
return Cost;
}
case Instruction::Select:
if (ValTy->isFloatingPointTy())
return 4; // No load on condition for FP - costs a conditional jump.
+ if (I != nullptr && isInt128InVR(ValTy)) {
+ unsigned ImmLoadCost = 0;
+ if (isa<ConstantInt>(I->getOperand(1)) ||
+ isa<ConstantInt>(I->getOperand(2)))
+ ImmLoadCost++;
+ return 4 + ImmLoadCost;
+ }
return 1; // Load On Condition / Select Register.
}
}
@@ -1157,6 +1217,15 @@ InstructionCost SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);
+ // Storing an i128 constant requires load from Constant Pool.
+ if (isInt128InVR(Src) && Opcode == Instruction::Store && I != nullptr &&
+ isa<ConstantInt>(I->getOperand(0)))
+ return 2;
+
+ // FP128 is a legal type but kept in a register pair on older CPUs.
+ if (Src->isFP128Ty() && !ST->hasVectorEnhancements1())
+ return 2;
+
unsigned NumOps =
(Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src));
@@ -1177,10 +1246,6 @@ InstructionCost SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
}
}
- if (Src->getScalarSizeInBits() == 128)
- // 128 bit scalars are held in a pair of two 64 bit registers.
- NumOps *= 2;
-
return NumOps;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 62c59ddc3f06a39..2cccdf6d17dacf4 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -28,6 +28,8 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
unsigned const LIBCALL_COST = 30;
+ bool isInt128InVR(Type *Ty) { return Ty->isIntegerTy(128) && ST->hasVector(); }
+
public:
explicit SystemZTTIImpl(const SystemZTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
diff --git a/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll b/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll
new file mode 100644
index 000000000000000..d3e60c7df51e5a1
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll
@@ -0,0 +1,185 @@
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+;
+
+define i128 @fun1(i128 %val1, i128 %val2) {
+; CHECK-LABEL: 'fun1'
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp eq i128 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v128 = sext i1 %cmp to i128
+ %cmp = icmp eq i128 %val1, %val2
+ %v128 = sext i1 %cmp to i128
+ ret i128 %v128
+}
+
+define i128 @fun2(i128 %val1, i128 %val2) {
+; CHECK-LABEL: 'fun2'
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp eq i128 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v128 = zext i1 %cmp to i128
+ %cmp = icmp eq i128 %val1, %val2
+ %v128 = zext i1 %cmp to i128
+ ret i128 %v128
+}
+
+define i128 @fun3(i128 %val1, i128 %val2,
+ i128 %val3, i128 %val4) {
+; CHECK-LABEL: 'fun3'
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp eq i128 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %add = add i128 %val3, %val4
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %sel = select i1 %cmp, i128 %val3, i128 %add
+ %cmp = icmp eq i128 %val1, %val2
+ %add = add i128 %val3, %val4
+ %sel = select i1 %cmp, i128 %val3, i128 %add
+ ret i128 %sel
+}
+
+
+define i128 @fun3_b(i128 %val1) {
+; CHECK-LABEL: 'fun3_b'
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %cmp = icmp eq i128 %val1, 123
+; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %sel = select i1 %cmp, i128 %val1, i128 456
+ %cmp = icmp eq i128 %val1, 123
+ %sel = select i1 %cmp, i128 %val1, i128 456
+ ret i128 %sel
+}
+
+define i128 @fun3_c(i128 %val1) {
+; CHECK-LABEL: 'fun3_c'
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %cmp = icmp eq i128 %val1, 123
+; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %sel = select i1 %cmp, i128 567, i128 456
+ %cmp = icmp eq i128 %val1, 123
+ %sel = select i1 %cmp, i128 567, i128 456
+ ret i128 %sel
+}
+
+define i128 @fun4(ptr %src) {
+; CHECK-LABEL: 'fun4'
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res = sext i64 %v to i128
+ %v = load i64, ptr %src, align 8
+ %res = sext i64 %v to i128
+ ret i128 %res
+}
+
+define i128 @fun5(i64 %lhs, i64 %rhs) {
+; CHECK-LABEL: 'fun5'
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res = sext i64 %v to i128
+ %v = add i64 %lhs, %rhs
+ %res = sext i64 %v to i128
+ ret i128 %res
+}
+
+define i128 @fun6(ptr %src) {
+; CHECK-LABEL: 'fun6'
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res = zext i64 %v to i128
+ %v = load i64, ptr %src, align 8
+ %res = zext i64 %v to i128
+ ret i128 %res
+}
+
+define i128 @fun7(i64 %lhs, i64 %rhs) {
+; CHECK-LABEL: 'fun7'
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res = zext i64 %v to i128
+ %v = add i64 %lhs, %rhs
+ %res = zext i64 %v to i128
+ ret i128 %res
+}
+
+; Truncating store is free.
+define void @fun8(i128 %lhs, i128 %rhs, ptr %dst) {
+; CHECK-LABEL: 'fun8'
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %t = trunc i128 %v to i64
+ %v = add i128 %lhs, %rhs
+ %t = trunc i128 %v to i64
+ store i64 %t, ptr %dst, align 8
+ ret void
+}
+
+; If there is a non-store user, an extraction is needed.
+define i64 @fun9(i128 %lhs, i128 %rhs, ptr %dst) {
+; CHECK-LABEL: 'fun9'
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %t = trunc i128 %v to i64
+ %v = add i128 %lhs, %rhs
+ %t = trunc i128 %v to i64
+ store i64 %t, ptr %dst, align 8
+ ret i64 %t
+}
+
+; Truncation of load is free.
+define i64 @fun10(ptr %src) {
+; CHECK-LABEL: 'fun10'
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %t = trunc i128 %v to i64
+ %v = load i128, ptr %src, align 8
+ %t = trunc i128 %v to i64
+ ret i64 %t
+}
+
+; If the load has another user, the truncation becomes an extract.
+define i64 @fun11(ptr %src, i128 %val2, ptr %dst) {
+; CHECK-LABEL: 'fun11'
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %t = trunc i128 %v to i64
+ %v = load i128, ptr %src, align 8
+ %t = trunc i128 %v to i64
+ %a = add i128 %v, %val2
+ store i128 %a, ptr %dst
+ ret i64 %t
+}
+
+; Trunction with a GPR use typically requires an extraction.
+define i64 @fun12(i128 %lhs, i128 %rhs) {
+; CHECK-LABEL: 'fun12'
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %t = trunc i128 %v to i64
+ %v = add i128 %lhs, %rhs
+ %t = trunc i128 %v to i64
+ ret i64 %t
+}
+
+; Fp<->Int conversions require libcalls.
+define void @fun13() {
+; CHECK-LABEL: 'fun13'
+; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v0 = fptosi fp128 undef to i128
+; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v1 = fptosi double undef to i128
+; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v2 = fptosi float undef to i128
+; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v3 = fptoui fp128 undef to i128
+; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v4 = fptoui double undef to i128
+; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v5 = fptoui float undef to i128
+; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v6 = sitofp i128 undef to fp128
+; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v7 = sitofp i128 undef to double
+; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v8 = sitofp i128 undef to float
+; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v9 = uitofp i128 undef to fp128
+; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v10 = uitofp i128 undef to double
+; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v11 = uitofp i128 undef to float
+ %v0 = fptosi fp128 undef to i128
+ %v1 = fptosi double undef to i128
+ %v2 = fptosi float undef to i128
+ %v3 = fptoui fp128 undef to i128
+ %v4 = fptoui double undef to i128
+ %v5 = fptoui float undef to i128
+ %v6 = sitofp i128 undef to fp128
+ %v7 = sitofp i128 undef to double
+ %v8 = sitofp i128 undef to float
+ %v9 = uitofp i128 undef to fp128
+ %v10 = uitofp i128 undef to double
+ %v11 = uitofp i128 undef to float
+ ret void
+}
+
+; All i128 immediates (big and small) are loaded from the constant pool.
+define void @fun14(ptr %dst) {
+; CHECK-LABEL: 'fun14'
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store i128 166153499473114484112, ptr %dst, align 8
+ store i128 166153499473114484112, ptr %dst, align 8
+ ret void
+}
+
+define void @fun15(ptr %dst) {
+; CHECK-LABEL: 'fun15'
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store i128 123, ptr %dst, align 8
+ store i128 123, ptr %dst, align 8
+ ret void
+}
+
+define void @fun16(ptr %dst, i128 %val1) {
+; CHECK-LABEL: 'fun16'
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res = add i128 %val1, 123
+ %res = add i128 %val1, 123
+ store i128 %res, ptr %dst, align 8
+ ret void
+}
diff --git a/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll b/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll
index 71863b923ca38b3..fc4d19c5cdf9e58 100644
--- a/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll
+++ b/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll
@@ -8,6 +8,7 @@ define void @add() {
%res1 = add i16 undef, undef
%res2 = add i32 undef, undef
%res3 = add i64 undef, undef
+ %resQ = add i128 undef, undef
%res4 = add <2 x i8> undef, undef
%res5 = add <2 x i16> undef, undef
%res6 = add <2 x i32> undef, undef
@@ -29,6 +30,7 @@ define void @add() {
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = add i16 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = add i32 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = add i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %resQ = add i128 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = add <2 x i8> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = add <2 x i16> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = add <2 x i32> undef, undef
@@ -54,6 +56,7 @@ define void @sub() {
%res1 = sub i16 undef, undef
%res2 = sub i32 undef, undef
%res3 = sub i64 undef, undef
+ %resQ = sub i128 undef, undef
%res4 = sub <2 x i8> undef, undef
%res5 = sub <2 x i16> undef, undef
%res6 = sub <2 x i32> undef, undef
@@ -75,6 +78,7 @@ define void @sub() {
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = sub i16 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = sub i32 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = sub i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %resQ = sub i128 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = sub <2 x i8> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = sub <2 x i16> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = sub <2 x i32> undef, undef
@@ -100,6 +104,7 @@ define void @mul() {
%res1 = mul i16 undef, undef
%res2 = mul i32 undef, undef
%res3 = mul i64 undef, undef
+ %resQ = mul i128 undef, undef
%res4 = mul <2 x i8> undef, undef
%res5 = mul <2 x i16> undef, undef
%res6 = mul <2 x i32> undef, undef
@@ -121,6 +126,7 @@ define void @mul() {
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = mul i16 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul i32 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = mul i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %resQ = mul i128 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <2 x i8> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = mul <2 x i16> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = mul <2 x i32> undef, undef
diff --git a/llvm/test/Analysis/CostModel/SystemZ/intrinsics.ll b/llvm/test/Analysis/CostModel/SystemZ/intrinsics.ll
index d3e07fa9735b328..032b78099c57126 100644
--- a/llvm/test/Analysis/CostModel/SystemZ/intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/SystemZ/intrinsics.ll
@@ -3,6 +3,13 @@
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z15 \
; RUN: | FileCheck %s -check-prefixes=CHECK,Z15
+define void @bswap_i128(i128 %arg) {
+; CHECK: function 'bswap_i128'
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp = tail call i128 @llvm.bswap.i128(i128 %arg)
+ %swp = tail call i128 @llvm.bswap.i128(i128 %arg)
+ ret void
+}
+
define void @bswap_i64(i64 %arg, <2 x i64> %arg2) {
; CHECK: function 'bswap_i64'
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i64
@@ -186,6 +193,8 @@ define void @bswap_v8i16_mem(ptr %src, <8 x i16> %arg, ptr %dst) {
ret void
}
+declare i128 @llvm.bswap.i128(i128)
+
declare i64 @llvm.bswap.i64(i64)
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
diff --git a/llvm/test/Analysis/CostModel/SystemZ/load-to-trunc.ll b/llvm/test/Analysis/CostModel/SystemZ/load-to-trunc.ll
deleted file mode 100644
index cd6af575ea9ec3d..000000000000000
--- a/llvm/test/Analysis/CostModel/SystemZ/load-to-trunc.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; Check memory cost model action for a load of an unusually sized integer
-; follow by and a trunc to a register sized integer gives a cost of 1 rather
-; than the expanded cost if it is not. This target does not currently perform
-; the expansion in the cost modelling.
-; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=systemz-unknown < %s | FileCheck %s --check-prefix=CHECK
-
-; Check that cost is 1 for unusual load to register sized load.
-define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
-; CHECK-LABEL: 'loadUnusualIntegerWithTrunc'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i128 %out to i32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %trunc
-;
- %out = load i128, ptr %ptr
- %trunc = trunc i128 %out to i32
- ret i32 %trunc
-}
-
-define i128 @loadUnusualInteger(ptr %ptr) {
-; CHECK-LABEL: 'loadUnusualInteger'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i128 %out
-;
- %out = load i128, ptr %ptr
- ret i128 %out
-}
diff --git a/llvm/test/Analysis/CostModel/SystemZ/load_store.ll b/llvm/test/Analysis/CostModel/SystemZ/load_store.ll
index 1766dd3b2859e65..4d36c9ed421e08e 100644
--- a/llvm/test/Analysis/CostModel/SystemZ/load_store.ll
+++ b/llvm/test/Analysis/CostModel/SystemZ/load_store.ll
@@ -1,10 +1,13 @@
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=zEC12 | FileCheck %s --check-prefixes=CHECK,ZEC12
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s --check-prefixes=CHECK,Z13
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z14 | FileCheck %s --check-prefixes=CHECK,Z14
define void @store() {
store i8 undef, ptr undef
store i16 undef, ptr undef
store i32 undef, ptr undef
store i64 undef, ptr undef
+ store i128 undef, ptr undef
store float undef, ptr undef
store double undef, ptr undef
store fp128 undef, ptr undef
@@ -37,9 +40,14 @@ define void @store() {
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef
+; ZEC12: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, ptr undef
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: store i128 undef, ptr undef
+; Z14: Cost Model: Found an estimated cost of 1 for instruction: store i128 undef, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store fp128 undef, ptr undef
+; ZEC12: Cost Model: Found an estimated cost of 2 for instruction: store fp128 undef, ptr undef
+; Z13: Cost Model: Found an estimated cost of 2 for instruction: store fp128 undef, ptr undef
+; Z14: Cost Model: Found an estimated cost of 1 for instruction: store fp128 undef, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef
@@ -73,6 +81,7 @@ define void @load() {
load i16, ptr undef
load i32, ptr undef
load i64, ptr undef
+ load i128, ptr undef
load float, ptr undef
load double, ptr undef
load fp128, ptr undef
@@ -105,33 +114,38 @@ define void @load() {
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = load float, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = load double, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %7 = load fp128, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i8>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i16>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i64>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x float>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <2 x double>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <4 x i8>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i16>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x i32>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <4 x i64>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <4 x double>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <8 x i8>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <8 x i32>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <8 x i64>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %24 = load <8 x float>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %25 = load <8 x double>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <16 x i8>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <16 x i16>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %28 = load <16 x i32>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %29 = load <16 x i64>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %30 = load <16 x float>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %31 = load <16 x double>, ptr undef
+; ZEC12: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, ptr undef
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %5 = load i128, ptr undef
+; Z14: Cost Model: Found an estimated cost of 1 for instruction: %5 = load i128, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, ptr undef
+; ZEC12: Cost Model: Found an estimated cost of 2 for instruction: %8 = load fp128, ptr undef
+; Z13: Cost Model: Found an estimated cost of 2 for instruction: %8 = load fp128, ptr undef
+; Z14: Cost Model: Found an estimated cost of 1 for instruction: %8 = load fp128, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i8>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i16>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i32>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i64>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <2 x float>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <2 x double>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i8>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x i16>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %17 = load <4 x i32>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %18 = load <4 x i64>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <4 x float>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %20 = load <4 x double>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i8>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i16>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %23 = load <8 x i32>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %24 = load <8 x i64>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %25 = load <8 x float>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %26 = load <8 x double>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <16 x i8>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <16 x i16>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %29 = load <16 x i32>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %30 = load <16 x i64>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <16 x float>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %32 = load <16 x double>, ptr undef
ret void;
}
diff --git a/llvm/test/Analysis/CostModel/SystemZ/logic-i128.ll b/llvm/test/Analysis/CostModel/SystemZ/logic-i128.ll
new file mode 100644
index 000000000000000..f4c4fceed717f39
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/SystemZ/logic-i128.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 \
+; RUN: | FileCheck %s -check-prefixes=CHECK,Z13
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z14 \
+; RUN: | FileCheck %s -check-prefixes=CHECK,Z14
+
+define void @fun(i128 %a) {
+; CHECK-LABEL: 'fun'
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c0 = xor i128 %l0, -1
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res0 = or i128 %a, %c0
+; Z14: Cost Model: Found an estimated cost of 0 for instruction: %res0 = or i128 %a, %c0
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c1 = xor i128 %l1, -1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %res1 = and i128 %a, %c1
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c2 = and i128 %l2, %a
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res2 = xor i128 %c2, -1
+; Z14: Cost Model: Found an estimated cost of 0 for instruction: %res2 = xor i128 %c2, -1
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c3 = or i128 %l3, %a
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %res3 = xor i128 %c3, -1
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c4 = xor i128 %l4, %a
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res4 = xor i128 %c4, -1
+; Z14: Cost Model: Found an estimated cost of 0 for instruction: %res4 = xor i128 %c4, -1
+;
+ %l0 = load i128, ptr undef
+ %c0 = xor i128 %l0, -1
+ %res0 = or i128 %a, %c0
+ store i128 %res0, ptr undef
+
+ %l1 = load i128, ptr undef
+ %c1 = xor i128 %l1, -1
+ %res1 = and i128 %a, %c1
+ store i128 %res1, ptr undef
+
+ %l2 = load i128, ptr undef
+ %c2 = and i128 %l2, %a
+ %res2 = xor i128 %c2, -1
+ store i128 %res2, ptr undef
+
+ %l3 = load i128, ptr undef
+ %c3 = or i128 %l3, %a
+ %res3 = xor i128 %c3, -1
+ store i128 %res3, ptr undef
+
+ %l4 = load i128, ptr undef
+ %c4 = xor i128 %l4, %a
+ %res4 = xor i128 %c4, -1
+ store i128 %res4, ptr undef
+
+ ret void
+}
diff --git a/llvm/test/Analysis/CostModel/SystemZ/logical.ll b/llvm/test/Analysis/CostModel/SystemZ/logical.ll
index 29935d6895fc05a..c87a3836ded6bd6 100644
--- a/llvm/test/Analysis/CostModel/SystemZ/logical.ll
+++ b/llvm/test/Analysis/CostModel/SystemZ/logical.ll
@@ -5,6 +5,7 @@ define void @and() {
%res1 = and i16 undef, undef
%res2 = and i32 undef, undef
%res3 = and i64 undef, undef
+ %resQ = and i128 undef, undef
%res4 = and <2 x i8> undef, undef
%res5 = and <2 x i16> undef, undef
%res6 = and <2 x i32> undef, undef
@@ -26,6 +27,7 @@ define void @and() {
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = and i16 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = and i32 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = and i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %resQ = and i128 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = and <2 x i8> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = and <2 x i16> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = and <2 x i32> undef, undef
@@ -51,6 +53,7 @@ define void @ashr() {
%res1 = ashr i16 undef, undef
%res2 = ashr i32 undef, undef
%res3 = ashr i64 undef, undef
+ %resQ = ashr i128 undef, undef
%res4 = ashr <2 x i8> undef, undef
%res5 = ashr <2 x i16> undef, undef
%res6 = ashr <2 x i32> undef, undef
@@ -72,6 +75,7 @@ define void @ashr() {
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = ashr i16 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = ashr i32 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = ashr i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %resQ = ashr i128 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = ashr <2 x i8> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = ashr <2 x i16> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = ashr <2 x i32> undef, undef
@@ -97,6 +101,7 @@ define void @lshr() {
%res1 = lshr i16 undef, undef
%res2 = lshr i32 undef, undef
%res3 = lshr i64 undef, undef
+ %resQ = lshr i128 undef, undef
%res4 = lshr <2 x i8> undef, undef
%res5 = lshr <2 x i16> undef, undef
%res6 = lshr <2 x i32> undef, undef
@@ -118,6 +123,7 @@ define void @lshr() {
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = lshr i16 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = lshr i32 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = lshr i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %resQ = lshr i128 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = lshr <2 x i8> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = lshr <2 x i16> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = lshr <2 x i32> undef, undef
@@ -143,6 +149,7 @@ define void @or() {
%res1 = or i16 undef, undef
%res2 = or i32 undef, undef
%res3 = or i64 undef, undef
+ %resQ = or i128 undef, undef
%res4 = or <2 x i8> undef, undef
%res5 = or <2 x i16> undef, undef
%res6 = or <2 x i32> undef, undef
@@ -164,6 +171,7 @@ define void @or() {
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = or i16 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = or i32 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = or i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %resQ = or i128 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = or <2 x i8> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = or <2 x i16> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = or <2 x i32> undef, undef
@@ -189,6 +197,7 @@ define void @shl() {
%res1 = shl i16 undef, undef
%res2 = shl i32 undef, undef
%res3 = shl i64 undef, undef
+ %resQ = shl i128 undef, undef
%res4 = shl <2 x i8> undef, undef
%res5 = shl <2 x i16> undef, undef
%res6 = shl <2 x i32> undef, undef
@@ -210,6 +219,7 @@ define void @shl() {
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = shl i16 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = shl i32 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = shl i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %resQ = shl i128 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = shl <2 x i8> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = shl <2 x i16> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = shl <2 x i32> undef, undef
@@ -235,6 +245,7 @@ define void @xor() {
%res1 = xor i16 undef, undef
%res2 = xor i32 undef, undef
%res3 = xor i64 undef, undef
+ %resQ = xor i128 undef, undef
%res4 = xor <2 x i8> undef, undef
%res5 = xor <2 x i16> undef, undef
%res6 = xor <2 x i32> undef, undef
@@ -256,6 +267,7 @@ define void @xor() {
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = xor i16 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = xor i32 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = xor i64 undef, undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %resQ = xor i128 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = xor <2 x i8> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = xor <2 x i16> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = xor <2 x i32> undef, undef
>From c0c60f136cbd4e0f52ab00a7a18341b6cd5d4d68 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Thu, 18 Jan 2024 10:34:01 -0600
Subject: [PATCH 2/3] Remove the i128 immediate costs for now.
---
.../SystemZ/SystemZTargetTransformInfo.cpp | 36 +++-------------
.../CostModel/SystemZ/i128-cmp-ext-conv.ll | 42 -------------------
2 files changed, 6 insertions(+), 72 deletions(-)
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index d69ff9e96c3e473..9370fb51a96c569 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -467,8 +467,6 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
}
}
- unsigned ImmLoadCost = 0;
-
if (!Ty->isVectorTy()) {
// These FP operations are supported with a dedicated instruction for
// float, double and fp128 (base implementation assumes float generally
@@ -481,13 +479,6 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
if (Opcode == Instruction::FRem)
return LIBCALL_COST;
- // Most i128 immediates must be loaded from the constant pool.
- if (Ty->isIntegerTy(128))
- for (const Value *A : Args)
- if (auto *C = dyn_cast<ConstantInt>(A))
- if (Opcode != Instruction::Xor || !C->isAllOnesValue())
- ImmLoadCost++;
-
// Give discount for some combined logical operations if supported.
if (Args.size() == 2) {
if (Opcode == Instruction::Xor) {
@@ -500,7 +491,7 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
if ((ScalarBits <= 64 && ST->hasMiscellaneousExtensions3()) ||
(isInt128InVR(Ty) &&
(I->getOpcode() == Instruction::Or || ST->hasVectorEnhancements1())))
- return 0 + ImmLoadCost;
+ return 0;
}
}
else if (Opcode == Instruction::And || Opcode == Instruction::Or) {
@@ -510,14 +501,14 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
((ScalarBits <= 64 && ST->hasMiscellaneousExtensions3()) ||
(isInt128InVR(Ty) &&
(Opcode == Instruction::And || ST->hasVectorEnhancements1()))))
- return 0 + ImmLoadCost;
+ return 0;
}
}
}
// Or requires one instruction, although it has custom handling for i64.
if (Opcode == Instruction::Or)
- return 1 + ImmLoadCost;
+ return 1;
if (Opcode == Instruction::Xor && ScalarBits == 1) {
if (ST->hasLoadStoreOnCond2())
@@ -605,7 +596,7 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
// Fallback to the default implementation.
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
- Args, CxtI) + ImmLoadCost;
+ Args, CxtI);
}
InstructionCost SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
@@ -990,21 +981,11 @@ InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
unsigned Cost = 1;
if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2);
- if (isInt128InVR(ValTy) && I != nullptr &&
- isa<ConstantInt>(I->getOperand(1)))
- Cost++;
return Cost;
}
case Instruction::Select:
- if (ValTy->isFloatingPointTy())
- return 4; // No load on condition for FP - costs a conditional jump.
- if (I != nullptr && isInt128InVR(ValTy)) {
- unsigned ImmLoadCost = 0;
- if (isa<ConstantInt>(I->getOperand(1)) ||
- isa<ConstantInt>(I->getOperand(2)))
- ImmLoadCost++;
- return 4 + ImmLoadCost;
- }
+ if (ValTy->isFloatingPointTy() || isInt128InVR(ValTy))
+ return 4; // No LOC for FP / i128 - costs a conditional jump.
return 1; // Load On Condition / Select Register.
}
}
@@ -1217,11 +1198,6 @@ InstructionCost SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);
- // Storing an i128 constant requires load from Constant Pool.
- if (isInt128InVR(Src) && Opcode == Instruction::Store && I != nullptr &&
- isa<ConstantInt>(I->getOperand(0)))
- return 2;
-
// FP128 is a legal type but kept in a register pair on older CPUs.
if (Src->isFP128Ty() && !ST->hasVectorEnhancements1())
return 2;
diff --git a/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll b/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll
index d3e60c7df51e5a1..66da6de3bc76813 100644
--- a/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll
+++ b/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll
@@ -31,25 +31,6 @@ define i128 @fun3(i128 %val1, i128 %val2,
ret i128 %sel
}
-
-define i128 @fun3_b(i128 %val1) {
-; CHECK-LABEL: 'fun3_b'
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %cmp = icmp eq i128 %val1, 123
-; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %sel = select i1 %cmp, i128 %val1, i128 456
- %cmp = icmp eq i128 %val1, 123
- %sel = select i1 %cmp, i128 %val1, i128 456
- ret i128 %sel
-}
-
-define i128 @fun3_c(i128 %val1) {
-; CHECK-LABEL: 'fun3_c'
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %cmp = icmp eq i128 %val1, 123
-; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %sel = select i1 %cmp, i128 567, i128 456
- %cmp = icmp eq i128 %val1, 123
- %sel = select i1 %cmp, i128 567, i128 456
- ret i128 %sel
-}
-
define i128 @fun4(ptr %src) {
; CHECK-LABEL: 'fun4'
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res = sext i64 %v to i128
@@ -160,26 +141,3 @@ define void @fun13() {
%v11 = uitofp i128 undef to float
ret void
}
-
-; All i128 immediates (big and small) are loaded from the constant pool.
-define void @fun14(ptr %dst) {
-; CHECK-LABEL: 'fun14'
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store i128 166153499473114484112, ptr %dst, align 8
- store i128 166153499473114484112, ptr %dst, align 8
- ret void
-}
-
-define void @fun15(ptr %dst) {
-; CHECK-LABEL: 'fun15'
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store i128 123, ptr %dst, align 8
- store i128 123, ptr %dst, align 8
- ret void
-}
-
-define void @fun16(ptr %dst, i128 %val1) {
-; CHECK-LABEL: 'fun16'
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res = add i128 %val1, 123
- %res = add i128 %val1, 123
- store i128 %res, ptr %dst, align 8
- ret void
-}
>From 17f5494e88d0614186c66ee73f5dbef0c7003ea1 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Thu, 18 Jan 2024 11:16:57 -0600
Subject: [PATCH 3/3] Don't remove the load-to-trunc.ll test which is for z10
---
.../CostModel/SystemZ/load-to-trunc.ll | 27 +++++++++++++++++++
1 file changed, 27 insertions(+)
create mode 100644 llvm/test/Analysis/CostModel/SystemZ/load-to-trunc.ll
diff --git a/llvm/test/Analysis/CostModel/SystemZ/load-to-trunc.ll b/llvm/test/Analysis/CostModel/SystemZ/load-to-trunc.ll
new file mode 100644
index 000000000000000..cd6af575ea9ec3d
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/SystemZ/load-to-trunc.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; Check memory cost model action for a load of an unusually sized integer
+; follow by and a trunc to a register sized integer gives a cost of 1 rather
+; than the expanded cost if it is not. This target does not currently perform
+; the expansion in the cost modelling.
+; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=systemz-unknown < %s | FileCheck %s --check-prefix=CHECK
+
+; Check that cost is 1 for unusual load to register sized load.
+define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
+; CHECK-LABEL: 'loadUnusualIntegerWithTrunc'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i128 %out to i32
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %trunc
+;
+ %out = load i128, ptr %ptr
+ %trunc = trunc i128 %out to i32
+ ret i32 %trunc
+}
+
+define i128 @loadUnusualInteger(ptr %ptr) {
+; CHECK-LABEL: 'loadUnusualInteger'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i128 %out
+;
+ %out = load i128, ptr %ptr
+ ret i128 %out
+}
More information about the llvm-commits
mailing list