[llvm] UMULH example (PR #161224)

Mon Sep 29 09:24:13 PDT 2025

https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/161224

None

>From 29db710b583cebb82756389d1142ce722f52908a Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 29 Sep 2025 17:22:56 +0100
Subject: [PATCH] UMULH test

---
 llvm/lib/Transforms/Vectorize/VPlan.h             |  2 ++
 llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp   |  1 +
 llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h |  6 ++++++
 llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp    | 15 +++++++++++++++
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 15 +++++++++++++++
 5 files changed, 39 insertions(+)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 0822511150e9e..6b50e286d5f1b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1063,6 +1063,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
     ResumeForEpilogue,
     /// Returns the value for vscale.
     VScale,
+    /// mulh
+    UMulh,
   };
 
   /// Returns true if this VPInstruction generates scalar values for all lanes.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 07bfe7a896d86..d083cc6a6d102 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -75,6 +75,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
   case Instruction::Freeze:
   case VPInstruction::ReductionStartVector:
   case VPInstruction::ResumeForEpilogue:
+  case VPInstruction::UMulh:
     return inferScalarType(R->getOperand(0));
   case Instruction::Select: {
     Type *ResTy = inferScalarType(R->getOperand(1));
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 555efea1ea840..3ab6429218f91 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -449,6 +449,12 @@ inline AllRecipe_match<Instruction::Mul, Op0_t, Op1_t> m_Mul(const Op0_t &Op0,
   return m_Binary<Instruction::Mul, Op0_t, Op1_t>(Op0, Op1);
 }
 
+template <typename Op0_t, typename Op1_t>
+inline AllRecipe_match<Instruction::LShr, Op0_t, Op1_t>
+m_LShr(const Op0_t &Op0, const Op1_t &Op1) {
+  return m_Binary<Instruction::LShr, Op0_t, Op1_t>(Op0, Op1);
+}
+
 template <typename Op0_t, typename Op1_t>
 inline AllRecipe_commutative_match<Instruction::Mul, Op0_t, Op1_t>
 m_c_Mul(const Op0_t &Op0, const Op1_t &Op1) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index b5e30cb1fa655..53f97f232d11e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -510,6 +510,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
   case Instruction::ICmp:
   case Instruction::FCmp:
   case Instruction::Store:
+  case VPInstruction::UMulh:
   case VPInstruction::BranchOnCount:
   case VPInstruction::ComputeReductionResult:
   case VPInstruction::FirstOrderRecurrenceSplice:
@@ -892,6 +893,17 @@ Value *VPInstruction::generate(VPTransformState &State) {
     Value *B = State.get(getOperand(1));
     return Builder.CreateLogicalAnd(A, B, Name);
   }
+  case VPInstruction::UMulh: {
+    Value *A = State.get(getOperand(0));
+    Value *B = State.get(getOperand(1));
+    Type *DblTy = A->getType()->getWithNewBitWidth(A->getType()->getScalarSizeInBits()*2);
+    return Builder.CreateTrunc(
+        Builder.CreateLShr(
+            Builder.CreateMul(Builder.CreateZExt(A, DblTy),
+                              Builder.CreateZExt(B, DblTy), Name),
+            ConstantInt::get(DblTy, A->getType()->getScalarSizeInBits())),
+        A->getType());
+  }
   case VPInstruction::PtrAdd: {
     assert(vputils::onlyFirstLaneUsed(this) &&
            "can only generate first lane for PtrAdd");
@@ -1400,6 +1412,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
   case VPInstruction::ResumeForEpilogue:
     O << "resume-for-epilogue";
     break;
+  case VPInstruction::UMulh:
+    O << "umulh";
+    break;
   default:
     O << Instruction::getOpcodeName(getOpcode());
   }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 5252e1f928294..9302ea68c95e8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3630,6 +3630,21 @@ void VPlanTransforms::convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx,
     for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
       if (auto *Red = dyn_cast<VPReductionRecipe>(&R))
         tryToCreateAbstractReductionRecipe(Red, Ctx, Range);
+
+      if (auto *VP = dyn_cast<VPWidenCastRecipe>(&R)) {
+        VPValue *A, *B;
+        Type *Ty = Ctx.Types.inferScalarType(VP);
+        if (match(VP, m_Trunc(m_LShr(
+                          m_Mul(m_ZExt(m_VPValue(A)), m_ZExt(m_VPValue(B))),
+                          m_SpecificInt(Ty->getScalarSizeInBits())))) &&
+            Ctx.Types.inferScalarType(A) == Ctx.Types.inferScalarType(B) &&
+            Ctx.Types.inferScalarType(A) == Ty) {
+          dbgs() << "UMulh Matched\n";
+          auto Mulh = new VPInstruction(VPInstruction::UMulh, {A, B});
+          Mulh->insertBefore(*VPBB, R.getIterator());
+          VP->replaceAllUsesWith(Mulh);
+        }
+      }
     }
   }
 }