[llvm] [Analysis] Add Intrinsics::CLMUL case to cost calculations to getIntrinsicInstrCost / getTypeBasedIntrinsicInstrCost (PR #176552)

Sun Feb 1 04:17:54 PST 2026

https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/176552

>From 490041b0e6b039e209d7fbd9148074727fabb973 Mon Sep 17 00:00:00 2001
From: Ni Qiang <ni.qiang.pro at gmail.com>
Date: Sat, 17 Jan 2026 11:48:52 +0100
Subject: [PATCH 01/11] [Analysis] Add Intrinsics::CLMUL case to cost
 calculations to getIntrinsicInstrCost / getTypeBasedIntrinsicInstrCost

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      | 16 ++++++++++++++++
 llvm/test/Analysis/CostModel/AArch64/clmul.ll | 19 +++++++++++++++++++
 llvm/test/Analysis/CostModel/X86/clmul.ll     | 19 +++++++++++++++++++
 3 files changed, 54 insertions(+)
 create mode 100644 llvm/test/Analysis/CostModel/AArch64/clmul.ll
 create mode 100644 llvm/test/Analysis/CostModel/X86/clmul.ll

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ef91c845ce9e7..a3bf08e708623 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2192,6 +2192,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
       return Cost;
     }
+    case Intrinsic::clmul: {
+      return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind);
+    }
     }
 
     // Assume that we need to scalarize this intrinsic.)
@@ -2682,6 +2685,19 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::scmp:
       ISD = ISD::SCMP;
       break;
+    case Intrinsic::clmul: {
+      EVT ETy = getTLI()->getValueType(DL, RetTy);
+      if (ETy.isSimple()) {
+        MVT VT = ETy.getSimpleVT();
+        if (getTLI()->isOperationLegalOrCustom(ISD::CLMUL, VT))
+          return TTI::TCC_Basic;
+      }
+      InstructionCost PerBitCost =
+          thisT()->getArithmeticInstrCost(Instruction::And, RetTy, CostKind) +
+          thisT()->getArithmeticInstrCost(Instruction::Mul, RetTy, CostKind) +
+          thisT()->getArithmeticInstrCost(Instruction::Xor, RetTy, CostKind);
+      return RetTy->getScalarSizeInBits() * PerBitCost;
+    }
     }
 
     auto *ST = dyn_cast<StructType>(RetTy);
diff --git a/llvm/test/Analysis/CostModel/AArch64/clmul.ll b/llvm/test/Analysis/CostModel/AArch64/clmul.ll
new file mode 100644
index 0000000000000..bcf985fb5020d
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/clmul.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-- | FileCheck %s
+
+define void @clmul() {
+  ; CHECK-LABEL: 'clmul'
+  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 undef, i64 undef)
+  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 undef, i32 undef)
+  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 undef, i8 undef)
+  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+  ;
+  %call_i64 = call i64 @llvm.clmul.i64(i64 undef, i64 undef)
+  %call_i32 = call i32 @llvm.clmul.i32(i32 undef, i32 undef)
+  %call_i8 = call i8 @llvm.clmul.i8(i8 undef, i8 undef)
+  ret void
+}
+
+declare i64 @llvm.clmul.i64(i64, i64)
+declare i32 @llvm.clmul.i32(i32, i32)
+declare i8 @llvm.clmul.i8(i8, i8)
diff --git a/llvm/test/Analysis/CostModel/X86/clmul.ll b/llvm/test/Analysis/CostModel/X86/clmul.ll
new file mode 100644
index 0000000000000..1e5b857925307
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/X86/clmul.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-unknown-linux-gnu -mattr=+pclmul | FileCheck %s
+
+define void @clmul() {
+  ; CHECK-LABEL: 'clmul'
+  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 undef, i64 undef)
+  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 undef, i32 undef)
+  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 undef, i8 undef)
+  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+  ;
+  %call_i64 = call i64 @llvm.clmul.i64(i64 undef, i64 undef)
+  %call_i32 = call i32 @llvm.clmul.i32(i32 undef, i32 undef)
+  %call_i8 = call i8 @llvm.clmul.i8(i8 undef, i8 undef)
+  ret void
+}
+
+declare i64 @llvm.clmul.i64(i64, i64)
+declare i32 @llvm.clmul.i32(i32, i32)
+declare i8 @llvm.clmul.i8(i8, i8)

>From 4a4fa0ee79c4a2c8a3e0b74fe8edfdfb904aee37 Mon Sep 17 00:00:00 2001
From: Ni Qiang <ni.qiang.pro at gmail.com>
Date: Sat, 17 Jan 2026 18:44:15 +0100
Subject: [PATCH 02/11] Refactor CLMUL cost to use ISD::CLMUL and fix undef
 usage in tests

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      | 55 +++++++++----------
 llvm/test/Analysis/CostModel/AArch64/clmul.ll | 14 ++---
 llvm/test/Analysis/CostModel/X86/clmul.ll     | 14 ++---
 3 files changed, 40 insertions(+), 43 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index a3bf08e708623..24035f989f9fe 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -195,16 +195,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
 
   static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
     switch (M) {
-      case TTI::MIM_Unindexed:
-        return ISD::UNINDEXED;
-      case TTI::MIM_PreInc:
-        return ISD::PRE_INC;
-      case TTI::MIM_PreDec:
-        return ISD::PRE_DEC;
-      case TTI::MIM_PostInc:
-        return ISD::POST_INC;
-      case TTI::MIM_PostDec:
-        return ISD::POST_DEC;
+    case TTI::MIM_Unindexed:
+      return ISD::UNINDEXED;
+    case TTI::MIM_PreInc:
+      return ISD::PRE_INC;
+    case TTI::MIM_PreDec:
+      return ISD::PRE_DEC;
+    case TTI::MIM_PostInc:
+      return ISD::POST_INC;
+    case TTI::MIM_PostDec:
+      return ISD::POST_DEC;
     }
     llvm_unreachable("Unexpected MemIndexedMode");
   }
@@ -1251,7 +1251,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
         EVT ExtVT = EVT::getEVT(Dst);
         EVT LoadVT = EVT::getEVT(Src);
         unsigned LType =
-          ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
+            ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
         if (DstLT.first == SrcLT.first &&
             TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
           return 0;
@@ -2048,10 +2048,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
           thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);
       Cost += thisT()->getArithmeticInstrCost(
           BinaryOperator::Shl, RetTy, CostKind, OpInfoX,
-          {OpInfoZ.Kind, TTI::OP_None});
+                                              {OpInfoZ.Kind, TTI::OP_None});
       Cost += thisT()->getArithmeticInstrCost(
           BinaryOperator::LShr, RetTy, CostKind, OpInfoY,
-          {OpInfoZ.Kind, TTI::OP_None});
+                                              {OpInfoZ.Kind, TTI::OP_None});
       // Non-constant shift amounts requires a modulo. If the typesize is a
       // power-2 then this will be converted to an and, otherwise it will use a
       // urem.
@@ -2066,7 +2066,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
         Type *CondTy = RetTy->getWithNewBitWidth(1);
         Cost +=
             thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-                                        CmpInst::ICMP_EQ, CostKind);
+                                            CmpInst::ICMP_EQ, CostKind);
         Cost +=
             thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
                                         CmpInst::ICMP_EQ, CostKind);
@@ -2685,19 +2685,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::scmp:
       ISD = ISD::SCMP;
       break;
-    case Intrinsic::clmul: {
-      EVT ETy = getTLI()->getValueType(DL, RetTy);
-      if (ETy.isSimple()) {
-        MVT VT = ETy.getSimpleVT();
-        if (getTLI()->isOperationLegalOrCustom(ISD::CLMUL, VT))
-          return TTI::TCC_Basic;
-      }
-      InstructionCost PerBitCost =
-          thisT()->getArithmeticInstrCost(Instruction::And, RetTy, CostKind) +
-          thisT()->getArithmeticInstrCost(Instruction::Mul, RetTy, CostKind) +
-          thisT()->getArithmeticInstrCost(Instruction::Xor, RetTy, CostKind);
-      return RetTy->getScalarSizeInBits() * PerBitCost;
-    }
+    case Intrinsic::clmul:
+      ISD = ISD::CLMUL;
+      break;
     }
 
     auto *ST = dyn_cast<StructType>(RetTy);
@@ -2738,9 +2728,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     }
     case Intrinsic::experimental_constrained_fmuladd: {
       IntrinsicCostAttributes FMulAttrs(
-        Intrinsic::experimental_constrained_fmul, RetTy, Tys);
+          Intrinsic::experimental_constrained_fmul, RetTy, Tys);
       IntrinsicCostAttributes FAddAttrs(
-        Intrinsic::experimental_constrained_fadd, RetTy, Tys);
+          Intrinsic::experimental_constrained_fadd, RetTy, Tys);
       return thisT()->getIntrinsicInstrCost(FMulAttrs, CostKind) +
              thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind);
     }
@@ -3013,6 +3003,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       }
       break;
     }
+    case Intrinsic::clmul: {
+      InstructionCost PerBitCost =
+          thisT()->getArithmeticInstrCost(Instruction::And, RetTy, CostKind) +
+          thisT()->getArithmeticInstrCost(Instruction::Mul, RetTy, CostKind) +
+          thisT()->getArithmeticInstrCost(Instruction::Xor, RetTy, CostKind);
+      return RetTy->getScalarSizeInBits() * PerBitCost;
+    }
     default:
       break;
     }
diff --git a/llvm/test/Analysis/CostModel/AArch64/clmul.ll b/llvm/test/Analysis/CostModel/AArch64/clmul.ll
index bcf985fb5020d..08ac2e8b0771a 100644
--- a/llvm/test/Analysis/CostModel/AArch64/clmul.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/clmul.ll
@@ -1,16 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-- | FileCheck %s
 
-define void @clmul() {
+define void @clmul(i64 %a64, i64 %b64, i32 %a32, i32 %b32, i8 %a8, i8 %b8) {
   ; CHECK-LABEL: 'clmul'
-  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 undef, i64 undef)
-  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 undef, i32 undef)
-  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 undef, i8 undef)
+  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
+  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
+  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
   ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
   ;
-  %call_i64 = call i64 @llvm.clmul.i64(i64 undef, i64 undef)
-  %call_i32 = call i32 @llvm.clmul.i32(i32 undef, i32 undef)
-  %call_i8 = call i8 @llvm.clmul.i8(i8 undef, i8 undef)
+  %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
+  %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
+  %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
   ret void
 }
 
diff --git a/llvm/test/Analysis/CostModel/X86/clmul.ll b/llvm/test/Analysis/CostModel/X86/clmul.ll
index 1e5b857925307..a6c399ac2181f 100644
--- a/llvm/test/Analysis/CostModel/X86/clmul.ll
+++ b/llvm/test/Analysis/CostModel/X86/clmul.ll
@@ -1,16 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-unknown-linux-gnu -mattr=+pclmul | FileCheck %s
 
-define void @clmul() {
+define void @clmul(i64 %a64, i64 %b64, i32 %a32, i32 %b32, i8 %a8, i8 %b8) {
   ; CHECK-LABEL: 'clmul'
-  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 undef, i64 undef)
-  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 undef, i32 undef)
-  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 undef, i8 undef)
+  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
+  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
+  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
   ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
   ;
-  %call_i64 = call i64 @llvm.clmul.i64(i64 undef, i64 undef)
-  %call_i32 = call i32 @llvm.clmul.i32(i32 undef, i32 undef)
-  %call_i8 = call i8 @llvm.clmul.i8(i8 undef, i8 undef)
+  %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
+  %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
+  %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
   ret void
 }
 

>From d908e24c3a939709617dcf25bdb9722f50032d1c Mon Sep 17 00:00:00 2001
From: Ni Qiang <ni.qiang.pro at gmail.com>
Date: Sun, 18 Jan 2026 16:55:59 +0100
Subject: [PATCH 03/11] Run git-clang-format

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 24035f989f9fe..e75a05c1df92c 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2046,11 +2046,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
           thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
       Cost +=
           thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);
-      Cost += thisT()->getArithmeticInstrCost(
-          BinaryOperator::Shl, RetTy, CostKind, OpInfoX,
+      Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
+                                              CostKind, OpInfoX,
                                               {OpInfoZ.Kind, TTI::OP_None});
-      Cost += thisT()->getArithmeticInstrCost(
-          BinaryOperator::LShr, RetTy, CostKind, OpInfoY,
+      Cost += thisT()->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
+                                              CostKind, OpInfoY,
                                               {OpInfoZ.Kind, TTI::OP_None});
       // Non-constant shift amounts requires a modulo. If the typesize is a
       // power-2 then this will be converted to an and, otherwise it will use a
@@ -2064,8 +2064,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       // For non-rotates (X != Y) we must add shift-by-zero handling costs.
       if (X != Y) {
         Type *CondTy = RetTy->getWithNewBitWidth(1);
-        Cost +=
-            thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+        Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
                                             CmpInst::ICMP_EQ, CostKind);
         Cost +=
             thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,

>From 761782bb44efe9b3fe716014e9cffb259c6f05eb Mon Sep 17 00:00:00 2001
From: Ni Qiang <ni.qiang.pro at gmail.com>
Date: Sun, 18 Jan 2026 17:09:12 +0100
Subject: [PATCH 04/11] Add notes to keep cost calculation consistent with
 expansion code

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h         | 2 ++
 llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index e75a05c1df92c..823b5784bfbb7 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -3003,6 +3003,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       break;
     }
     case Intrinsic::clmul: {
+      // This cost model must match the expansion in
+      // TargetLowering::expandCLMUL.
       InstructionCost PerBitCost =
           thisT()->getArithmeticInstrCost(Instruction::And, RetTy, CostKind) +
           thisT()->getArithmeticInstrCost(Instruction::Mul, RetTy, CostKind) +
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 0ec783f1f1d04..e64fc3823d127 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8416,6 +8416,9 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const {
 
   switch (Opcode) {
   case ISD::CLMUL: {
+    // NOTE: If you change this expansion, please update the cost model
+    // calculation in BasicTTIImpl::getTypeBasedIntrinsicInstrCost for
+    // Intrinsic::clmul.
     SDValue Res = DAG.getConstant(0, DL, VT);
     for (unsigned I = 0; I < BW; ++I) {
       SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT);

>From d5ce52b4ed014ecfbf46456d0d61b39853db1c4e Mon Sep 17 00:00:00 2001
From: Ni Qiang <ni.qiang.pro at gmail.com>
Date: Sun, 18 Jan 2026 17:27:20 +0100
Subject: [PATCH 05/11] Fix new tests formatting + update x86 expectations

---
 llvm/test/Analysis/CostModel/AArch64/clmul.ll | 12 ++++++------
 llvm/test/Analysis/CostModel/X86/clmul.ll     | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/llvm/test/Analysis/CostModel/AArch64/clmul.ll b/llvm/test/Analysis/CostModel/AArch64/clmul.ll
index 08ac2e8b0771a..a7c5b4e0fe39a 100644
--- a/llvm/test/Analysis/CostModel/AArch64/clmul.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/clmul.ll
@@ -2,12 +2,12 @@
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-- | FileCheck %s
 
 define void @clmul(i64 %a64, i64 %b64, i32 %a32, i32 %b32, i8 %a8, i8 %b8) {
-  ; CHECK-LABEL: 'clmul'
-  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
-  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
-  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
-  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-  ;
+; CHECK-LABEL: 'clmul'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
   %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
   %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
diff --git a/llvm/test/Analysis/CostModel/X86/clmul.ll b/llvm/test/Analysis/CostModel/X86/clmul.ll
index a6c399ac2181f..706a19d705606 100644
--- a/llvm/test/Analysis/CostModel/X86/clmul.ll
+++ b/llvm/test/Analysis/CostModel/X86/clmul.ll
@@ -2,12 +2,12 @@
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-unknown-linux-gnu -mattr=+pclmul | FileCheck %s
 
 define void @clmul(i64 %a64, i64 %b64, i32 %a32, i32 %b32, i8 %a8, i8 %b8) {
-  ; CHECK-LABEL: 'clmul'
-  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
-  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
-  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
-  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-  ;
+; CHECK-LABEL: 'clmul'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
   %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
   %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)

>From c989cba3212e7062e04dd3aef8bfad39b224cf96 Mon Sep 17 00:00:00 2001
From: Ni Qiang <ni.qiang.pro at gmail.com>
Date: Sun, 18 Jan 2026 19:15:51 +0100
Subject: [PATCH 06/11] Drop unrelated formatting + fix new comment

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h | 45 ++++++++++++------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 823b5784bfbb7..7707612747960 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -195,16 +195,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
 
   static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
     switch (M) {
-    case TTI::MIM_Unindexed:
-      return ISD::UNINDEXED;
-    case TTI::MIM_PreInc:
-      return ISD::PRE_INC;
-    case TTI::MIM_PreDec:
-      return ISD::PRE_DEC;
-    case TTI::MIM_PostInc:
-      return ISD::POST_INC;
-    case TTI::MIM_PostDec:
-      return ISD::POST_DEC;
+      case TTI::MIM_Unindexed:
+        return ISD::UNINDEXED;
+      case TTI::MIM_PreInc:
+        return ISD::PRE_INC;
+      case TTI::MIM_PreDec:
+        return ISD::PRE_DEC;
+      case TTI::MIM_PostInc:
+        return ISD::POST_INC;
+      case TTI::MIM_PostDec:
+        return ISD::POST_DEC;
     }
     llvm_unreachable("Unexpected MemIndexedMode");
   }
@@ -1251,7 +1251,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
         EVT ExtVT = EVT::getEVT(Dst);
         EVT LoadVT = EVT::getEVT(Src);
         unsigned LType =
-            ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
+          ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
         if (DstLT.first == SrcLT.first &&
             TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
           return 0;
@@ -2046,12 +2046,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
           thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
       Cost +=
           thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);
-      Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
-                                              CostKind, OpInfoX,
-                                              {OpInfoZ.Kind, TTI::OP_None});
-      Cost += thisT()->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
-                                              CostKind, OpInfoY,
-                                              {OpInfoZ.Kind, TTI::OP_None});
+      Cost += thisT()->getArithmeticInstrCost(
+          BinaryOperator::Shl, RetTy, CostKind, OpInfoX,
+          {OpInfoZ.Kind, TTI::OP_None});
+      Cost += thisT()->getArithmeticInstrCost(
+          BinaryOperator::LShr, RetTy, CostKind, OpInfoY,
+          {OpInfoZ.Kind, TTI::OP_None});
       // Non-constant shift amounts requires a modulo. If the typesize is a
       // power-2 then this will be converted to an and, otherwise it will use a
       // urem.
@@ -2064,8 +2064,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       // For non-rotates (X != Y) we must add shift-by-zero handling costs.
       if (X != Y) {
         Type *CondTy = RetTy->getWithNewBitWidth(1);
-        Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-                                            CmpInst::ICMP_EQ, CostKind);
+        Cost +=
+            thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+                                        CmpInst::ICMP_EQ, CostKind);
         Cost +=
             thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
                                         CmpInst::ICMP_EQ, CostKind);
@@ -2727,9 +2728,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     }
     case Intrinsic::experimental_constrained_fmuladd: {
       IntrinsicCostAttributes FMulAttrs(
-          Intrinsic::experimental_constrained_fmul, RetTy, Tys);
+        Intrinsic::experimental_constrained_fmul, RetTy, Tys);
       IntrinsicCostAttributes FAddAttrs(
-          Intrinsic::experimental_constrained_fadd, RetTy, Tys);
+        Intrinsic::experimental_constrained_fadd, RetTy, Tys);
       return thisT()->getIntrinsicInstrCost(FMulAttrs, CostKind) +
              thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind);
     }
@@ -3003,7 +3004,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       break;
     }
     case Intrinsic::clmul: {
-      // This cost model must match the expansion in
+      // This cost model should match the expansion in
       // TargetLowering::expandCLMUL.
       InstructionCost PerBitCost =
           thisT()->getArithmeticInstrCost(Instruction::And, RetTy, CostKind) +

>From fa4f1d1f0fe92cac8cc41c588e6631f8844d7588 Mon Sep 17 00:00:00 2001
From: Ni Qiang <ni.qiang.pro at gmail.com>
Date: Sat, 24 Jan 2026 14:54:53 +0100
Subject: [PATCH 07/11] Adding i128 test case + no-pclmul + remove redundant
 declare

---
 llvm/test/Analysis/CostModel/AArch64/clmul.ll | 10 +++----
 llvm/test/Analysis/CostModel/X86/clmul.ll     | 28 ++++++++++++-------
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/llvm/test/Analysis/CostModel/AArch64/clmul.ll b/llvm/test/Analysis/CostModel/AArch64/clmul.ll
index a7c5b4e0fe39a..1a177cbb58f1d 100644
--- a/llvm/test/Analysis/CostModel/AArch64/clmul.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/clmul.ll
@@ -1,19 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-- | FileCheck %s
 
-define void @clmul(i64 %a64, i64 %b64, i32 %a32, i32 %b32, i8 %a8, i8 %b8) {
+define void @clmul(i128 %a128, i128 %b128, i64 %a64, i64 %b64, i32 %a32, i32 %b32, i8 %a8, i8 %b8) {
 ; CHECK-LABEL: 'clmul'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %call_i128 = call i128 @llvm.clmul.i128(i128 %a128, i128 %b128)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  %call_i128 = call i128 @llvm.clmul.i128(i128 %a128, i128 %b128)
   %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
   %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
   %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
   ret void
-}
-
-declare i64 @llvm.clmul.i64(i64, i64)
-declare i32 @llvm.clmul.i32(i32, i32)
-declare i8 @llvm.clmul.i8(i8, i8)
+}
\ No newline at end of file
diff --git a/llvm/test/Analysis/CostModel/X86/clmul.ll b/llvm/test/Analysis/CostModel/X86/clmul.ll
index 706a19d705606..d76f7e28a347e 100644
--- a/llvm/test/Analysis/CostModel/X86/clmul.ll
+++ b/llvm/test/Analysis/CostModel/X86/clmul.ll
@@ -1,19 +1,27 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-unknown-linux-gnu -mattr=+pclmul | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-unknown-linux-gnu -mattr=+pclmul | FileCheck %s --check-prefix=PCLMUL
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-unknown-linux-gnu -mattr=-pclmul | FileCheck %s --check-prefix=NO-PCLMUL
 
-define void @clmul(i64 %a64, i64 %b64, i32 %a32, i32 %b32, i8 %a8, i8 %b8) {
-; CHECK-LABEL: 'clmul'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+
+define void @clmul(i128 %a128, i128 %b128, i64 %a64, i64 %b64, i32 %a32, i32 %b32, i8 %a8, i8 %b8) {
+; PCLMUL-LABEL: 'clmul'
+; PCLMUL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %call_i128 = call i128 @llvm.clmul.i128(i128 %a128, i128 %b128)
+; PCLMUL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
+; PCLMUL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
+; PCLMUL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
+; PCLMUL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-PCLMUL-LABEL: 'clmul'
+; NO-PCLMUL-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %call_i128 = call i128 @llvm.clmul.i128(i128 %a128, i128 %b128)
+; NO-PCLMUL-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
+; NO-PCLMUL-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
+; NO-PCLMUL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
+; NO-PCLMUL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  %call_i128 = call i128 @llvm.clmul.i128(i128 %a128, i128 %b128)
   %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
   %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
   %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
   ret void
 }
 
-declare i64 @llvm.clmul.i64(i64, i64)
-declare i32 @llvm.clmul.i32(i32, i32)
-declare i8 @llvm.clmul.i8(i8, i8)

>From fbc8af216b0cae34ec3bf0d0b05a0b2a0a7b8904 Mon Sep 17 00:00:00 2001
From: Ni Qiang <ni.qiang.pro at gmail.com>
Date: Fri, 30 Jan 2026 16:12:12 +0100
Subject: [PATCH 08/11] Fixing end of file newlines + fix i128 cost test on x86

---
 llvm/test/Analysis/CostModel/AArch64/clmul.ll | 2 +-
 llvm/test/Analysis/CostModel/X86/clmul.ll     | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/test/Analysis/CostModel/AArch64/clmul.ll b/llvm/test/Analysis/CostModel/AArch64/clmul.ll
index 1a177cbb58f1d..3e5fa097169e1 100644
--- a/llvm/test/Analysis/CostModel/AArch64/clmul.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/clmul.ll
@@ -14,4 +14,4 @@ define void @clmul(i128 %a128, i128 %b128, i64 %a64, i64 %b64, i32 %a32, i32 %b3
   %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
   %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
   ret void
-}
\ No newline at end of file
+}
diff --git a/llvm/test/Analysis/CostModel/X86/clmul.ll b/llvm/test/Analysis/CostModel/X86/clmul.ll
index d76f7e28a347e..99ee2b81f81c7 100644
--- a/llvm/test/Analysis/CostModel/X86/clmul.ll
+++ b/llvm/test/Analysis/CostModel/X86/clmul.ll
@@ -5,7 +5,7 @@
 
 define void @clmul(i128 %a128, i128 %b128, i64 %a64, i64 %b64, i32 %a32, i32 %b32, i8 %a8, i8 %b8) {
 ; PCLMUL-LABEL: 'clmul'
-; PCLMUL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %call_i128 = call i128 @llvm.clmul.i128(i128 %a128, i128 %b128)
+; PCLMUL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %call_i128 = call i128 @llvm.clmul.i128(i128 %a128, i128 %b128)
 ; PCLMUL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
 ; PCLMUL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
 ; PCLMUL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
@@ -24,4 +24,3 @@ define void @clmul(i128 %a128, i128 %b128, i64 %a64, i64 %b64, i32 %a32, i32 %b3
   %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
   ret void
 }
-

>From f95363a0ed5c4ade139e979845c0a4e6c28bfc1d Mon Sep 17 00:00:00 2001
From: Ni Qiang <ni.qiang.pro at gmail.com>
Date: Fri, 30 Jan 2026 18:12:49 +0100
Subject: [PATCH 09/11] Fixing i128 test for NO-PCLMUL on x86

---
 llvm/test/Analysis/CostModel/X86/clmul.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/Analysis/CostModel/X86/clmul.ll b/llvm/test/Analysis/CostModel/X86/clmul.ll
index 99ee2b81f81c7..1877fca344027 100644
--- a/llvm/test/Analysis/CostModel/X86/clmul.ll
+++ b/llvm/test/Analysis/CostModel/X86/clmul.ll
@@ -12,7 +12,7 @@ define void @clmul(i128 %a128, i128 %b128, i64 %a64, i64 %b64, i32 %a32, i32 %b3
 ; PCLMUL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; NO-PCLMUL-LABEL: 'clmul'
-; NO-PCLMUL-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %call_i128 = call i128 @llvm.clmul.i128(i128 %a128, i128 %b128)
+; NO-PCLMUL-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %call_i128 = call i128 @llvm.clmul.i128(i128 %a128, i128 %b128)
 ; NO-PCLMUL-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
 ; NO-PCLMUL-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
 ; NO-PCLMUL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)

>From 65dc79154b947420f54a44ecdb934964a0a94b79 Mon Sep 17 00:00:00 2001
From: Ni Qiang <ni.qiang.pro at gmail.com>
Date: Fri, 30 Jan 2026 18:26:05 +0100
Subject: [PATCH 10/11] Fixing NO-PCLMUL test on x86

---
 llvm/test/Analysis/CostModel/X86/clmul.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Analysis/CostModel/X86/clmul.ll b/llvm/test/Analysis/CostModel/X86/clmul.ll
index 1877fca344027..53f852093cb4d 100644
--- a/llvm/test/Analysis/CostModel/X86/clmul.ll
+++ b/llvm/test/Analysis/CostModel/X86/clmul.ll
@@ -13,9 +13,9 @@ define void @clmul(i128 %a128, i128 %b128, i64 %a64, i64 %b64, i32 %a32, i32 %b3
 ;
 ; NO-PCLMUL-LABEL: 'clmul'
 ; NO-PCLMUL-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %call_i128 = call i128 @llvm.clmul.i128(i128 %a128, i128 %b128)
-; NO-PCLMUL-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
+; NO-PCLMUL-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64)
 ; NO-PCLMUL-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32)
-; NO-PCLMUL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
+; NO-PCLMUL-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8)
 ; NO-PCLMUL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %call_i128 = call i128 @llvm.clmul.i128(i128 %a128, i128 %b128)

>From badac1cca71709a95171b117512641a8c956ee86 Mon Sep 17 00:00:00 2001
From: Ni Qiang <ni.qiang.pro at gmail.com>
Date: Sat, 31 Jan 2026 17:19:36 +0100
Subject: [PATCH 11/11] Removing potentially unnecessary special case for clmul
 in getIntrinsicInstrCost

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 83d3d8e89867a..21afcbefdf719 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2212,9 +2212,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
       return Cost;
     }
-    case Intrinsic::clmul: {
-      return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind);
-    }
     }
 
     // Assume that we need to scalarize this intrinsic.)