[llvm] [CostModel] Handle all cost kinds in getCmpSelInstrCost (PR #148233)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 11 04:48:47 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: David Green (davemgreen)

<details>
<summary>Changes</summary>

Currently we always produce a cost of 1 for all CostKinds that are not RecipThroughput, which can underestimate the cost if the type has a higher legalization cost (like larger vectors). This relaxes it to cover all cost kinds.

---

Patch is 333.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148233.diff


24 Files Affected:

- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+1-2) 
- (modified) llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll (+48-48) 
- (modified) llvm/test/Analysis/CostModel/AArch64/cmp.ll (+4-4) 
- (modified) llvm/test/Analysis/CostModel/AArch64/fcmp.ll (+128-128) 
- (modified) llvm/test/Analysis/CostModel/AArch64/fshl.ll (+3-3) 
- (modified) llvm/test/Analysis/CostModel/AArch64/fshr.ll (+3-3) 
- (modified) llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll (+6-6) 
- (modified) llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll (+69-69) 
- (modified) llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll (+6-6) 
- (modified) llvm/test/Analysis/CostModel/AArch64/vector-select.ll (+34-34) 
- (modified) llvm/test/Analysis/CostModel/AMDGPU/reduce-and.ll (+2-2) 
- (modified) llvm/test/Analysis/CostModel/AMDGPU/reduce-or.ll (+2-2) 
- (modified) llvm/test/Analysis/CostModel/ARM/arith-overflow.ll (+90-90) 
- (modified) llvm/test/Analysis/CostModel/ARM/arith-ssat.ll (+50-50) 
- (modified) llvm/test/Analysis/CostModel/ARM/arith-usat.ll (+50-50) 
- (modified) llvm/test/Analysis/CostModel/ARM/cmps.ll (+23-23) 
- (modified) llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll (+3-3) 
- (modified) llvm/test/Analysis/CostModel/ARM/mve-abs.ll (+6-6) 
- (modified) llvm/test/Analysis/CostModel/ARM/mve-minmax.ll (+24-24) 
- (modified) llvm/test/Analysis/CostModel/ARM/select.ll (+52-52) 
- (modified) llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll (+19-23) 
- (modified) llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll (+171-186) 
- (modified) llvm/test/CodeGen/X86/test-shrink-bug.ll (+9-10) 
- (modified) llvm/test/Transforms/SimplifyCFG/ARM/phi-eliminate.ll (+10-2) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index fefd36ec54ae2..bff9979493125 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1369,8 +1369,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     int ISD = TLI->InstructionOpcodeToISD(Opcode);
     assert(ISD && "Invalid opcode");
 
-    // TODO: Handle other cost kinds.
-    if (CostKind != TTI::TCK_RecipThroughput)
+    if (getTLI()->getValueType(DL, ValTy, true) == MVT::Other)
       return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
                                        Op1Info, Op2Info, I);
 
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll b/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll
index 8b52b123d6d25..3fbd12ac8813c 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll
@@ -27,20 +27,20 @@ define i32 @sadd(i32 %arg) {
 ; CHECK-LABEL: 'sadd'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:4 Lat:4 SizeLat:4 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:4 Lat:4 SizeLat:4 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:6 SizeLat:6 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:10 Lat:10 SizeLat:10 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
@@ -90,20 +90,20 @@ define i32 @uadd(i32 %arg) {
 ; CHECK-LABEL: 'uadd'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
@@ -153,20 +153,20 @@ define i32 @ssub(i32 %arg) {
 ; CHECK-LABEL: 'ssub'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:4 Lat:4 SizeLat:4 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:4 Lat:4 SizeLat:4 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:6 SizeLat:6 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:10 Lat:10 SizeLat:10 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
@@ -216,20 +216,20 @@ define i32 @usub(i32 %arg) {
 ; CHECK-LABEL: 'usub'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
@@ -279,20 +279,20 @@ define i32 @smul(i32 %arg) {
 ; CHECK-LABEL: 'smul'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:8 Lat:8 SizeLat:8 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:68 CodeSize:8 Lat:8 SizeLat:8 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:136 CodeSize:8 Lat:8 SizeLat:8 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:68 CodeSize:9 Lat:9 SizeLat:9 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:136 CodeSize:11 Lat:11 SizeLat:11 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:8 Lat:8 SizeLat:8 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:8 Lat:8 SizeLat:8 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:152 CodeSize:8 Lat:8 SizeLat:8 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:9 Lat:9 SizeLat:9 for: %V8I32 = call { <8 x i32>, <8 x...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/148233


More information about the llvm-commits mailing list