[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Jul 28 06:48:35 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100519
>From 411c9c8f9fff386807a4ff6317dbec8a3eb1cd1a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 25 Jul 2024 10:27:54 +0400
Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes
---
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 67 ++++++++++---------
.../Analysis/CostModel/X86/arith-overflow.ll | 8 +--
2 files changed, 40 insertions(+), 35 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index a89d4fe467eb9..314390aee5085 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
ISD = ISD::USUBO;
break;
case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow: {
- Type *MulTy = RetTy->getContainedType(0);
- Type *OverflowTy = RetTy->getContainedType(1);
- unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
- Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
- bool IsSigned = IID == Intrinsic::smul_with_overflow;
-
- unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
- TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
- InstructionCost Cost = 0;
- Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind);
- Cost +=
- thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
- Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
- CCH, CostKind);
- Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
- CostKind,
- {TTI::OK_AnyValue, TTI::OP_None},
- {TTI::OK_UniformConstantValue, TTI::OP_None});
-
- if (IsSigned)
- Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
- CostKind,
- {TTI::OK_AnyValue, TTI::OP_None},
- {TTI::OK_UniformConstantValue, TTI::OP_None});
-
- Cost += thisT()->getCmpSelInstrCost(
- BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
- return Cost;
- }
+ ISD = ISD::SMULO;
+ break;
+ case Intrinsic::umul_with_overflow:
+ ISD = ISD::UMULO;
+ break;
case Intrinsic::fptosi_sat:
case Intrinsic::fptoui_sat: {
if (Tys.empty())
@@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
OverflowTy, Pred, CostKind);
return Cost;
}
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: {
+ Type *MulTy = RetTy->getContainedType(0);
+ Type *OverflowTy = RetTy->getContainedType(1);
+ unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
+ Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
+ bool IsSigned = IID == Intrinsic::smul_with_overflow;
+
+ unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
+ TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+ InstructionCost Cost = 0;
+ Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind);
+ Cost +=
+ thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+ Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
+ CCH, CostKind);
+ Cost += thisT()->getArithmeticInstrCost(
+ Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+ {TTI::OK_UniformConstantValue, TTI::OP_None});
+
+ if (IsSigned)
+ Cost += thisT()->getArithmeticInstrCost(
+ Instruction::AShr, MulTy, CostKind,
+ {TTI::OK_AnyValue, TTI::OP_None},
+ {TTI::OK_UniformConstantValue, TTI::OP_None});
+
+ Cost += thisT()->getCmpSelInstrCost(
+ BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
+ return Cost;
+ }
case Intrinsic::sadd_sat:
case Intrinsic::ssub_sat: {
// Assume a default expansion.
diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
index 963bb8a9d9fac..71bc6b5375c73 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
@@ -1080,7 +1080,7 @@ define i32 @smul(i32 %arg) {
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'smul'
@@ -1118,7 +1118,7 @@ define i32 @smul(i32 %arg) {
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'smul'
@@ -1318,7 +1318,7 @@ define i32 @umul(i32 %arg) {
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'umul'
@@ -1356,7 +1356,7 @@ define i32 @umul(i32 %arg) {
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'umul'
More information about the llvm-branch-commits
mailing list