[llvm] [IR] Add llvm `clmul` intrinsic (PR #140301)
Oscar Smith via llvm-commits
llvm-commits at lists.llvm.org
Sun May 18 06:13:48 PDT 2025
https://github.com/oscardssmith updated https://github.com/llvm/llvm-project/pull/140301
>From c74bfec5b0224309a203d7c9636c8ff2b2c4318a Mon Sep 17 00:00:00 2001
From: Oscar Smith <oscardssmith at gmail.com>
Date: Fri, 16 May 2025 12:15:08 -0400
Subject: [PATCH 1/2] add clmul docs
---
llvm/docs/LangRef.rst | 48 +++++++++++++++++++
llvm/include/llvm/CodeGen/ISDOpcodes.h | 3 ++
llvm/include/llvm/IR/Intrinsics.td | 8 ++++
llvm/lib/CodeGen/IntrinsicLowering.cpp | 23 +++++++++
.../SelectionDAG/SelectionDAGBuilder.cpp | 6 +++
.../SelectionDAG/SelectionDAGDumper.cpp | 1 +
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +
7 files changed, 91 insertions(+)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 343ca743c74f8..1444654a248e2 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -18072,6 +18072,54 @@ Example:
%r = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) ; %r = i8: 225 (0b11100001)
%r = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) ; %r = i8: 255 (0b11111111)
+.. clmul:
+
+'``clmul.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.clmul``
+on any integer bit width or vectors of integers.
+
+::
+
+ declare i16 @llvm.clmul.i16(i16 %a, i16 %b)
+ declare i32 @llvm.clmul.i32(i32 %a, i32 %b)
+ declare i64 @llvm.clmul.i64(i64 %a, i64 %b)
+ declare <4 x i32> @llvm.clmul.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+Overview
+"""""""""
+
+The '``llvm.clmul``' family of intrinsics functions perform carryless multiplication
+(also known as xor multiplication) on the 2 arguments.
+
+Arguments
+""""""""""
+
+The arguments (%a and %b) and the result may be of integer types of any bit
+width, but they must have the same bit width. ``%a`` and ``%b`` are the two
+values that will undergo carryless multiplication.
+
+Semantics:
+""""""""""
+
+The ‘llvm.clmul’ intrinsic computes carryless multiply of ``%a`` and ``%b``, which is the result
+of applying the standard multiplication algorithm if you replace all of the aditions with exclusive ors.
+The vector intrinsics, such as llvm.clmul.v4i32, operate on a per-element basis and the element order is not affected.
+
+Examples
+"""""""""
+
+.. code-block:: llvm
+
+ %res = call i4 @llvm.clmul.i4(i4 1, i4 2) ; %res = 2
+ %res = call i4 @llvm.clmul.i4(i4 5, i4 6) ; %res = 14
+ %res = call i4 @llvm.clmul.i4(i4 -4, i4 2) ; %res = -8
+ %res = call i4 @llvm.clmul.i4(i4 -4, i4 -5) ; %res = -12
+
Arithmetic with Overflow Intrinsics
-----------------------------------
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 9f66402e4c820..fc3b3b26cbe5e 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -751,6 +751,9 @@ enum NodeType {
ROTR,
FSHL,
FSHR,
+
+ /// Carryless multiplication operator
+ CLMUL,
/// Byte Swap and Counting operators.
BSWAP,
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index e1a135a5ad48e..1857829910340 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1431,6 +1431,8 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_fshr : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
+ def int_clmul : DefaultAttrsIntrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
}
let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn,
@@ -2103,6 +2105,12 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
+ def int_vp_clmul : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_sadd_sat : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 1518ead7698be..d66544ee87ea4 100644
--- a/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -199,6 +199,25 @@ static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
return LowerCTPOP(Context, V, IP);
}
+/// Emit the code to lower clmul of V1, V2 before the specified instruction IP.
+static Value *LowerCLMUL(LLVMContext &Context, Value *V1, Value *V2, Instruction *IP) {
+
+ IRBuilder<> Builder(IP);
+
+ unsigned BitSize = V1->getType()->getPrimitiveSizeInBits();
+ Value *Res = ConstantInt::get(V1->getType(), 0);
+ Value *Zero = ConstantInt::get(V1->getType(), 0);
+ Value *One = ConstantInt::get(V1->getType(), 1);
+ for (unsigned I = 1; I < BitSize; I ++) {
+ Value *LowBit = Builder.CreateAnd(V1, One, "clmul.isodd");
+ Value *Pred = Builder.CreateSelect(LowBit, V2, Zero, "clmul.V2_or_zero");
+ Res = Builder.CreateXor(Res, Pred, "clmul.Res");
+ V1 = Builder.CreateLShr(V1, One, "clmul.V1");
+ V2 = Builder.CreateShl(V2, One, "clmul.V2");
+ }
+ return LowerCTPOP(Context, Res, IP);
+}
+
static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
const char *Dname,
const char *LDname) {
@@ -262,6 +281,10 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI));
break;
+ case Intrinsic::clmul:
+ CI->replaceAllUsesWith(LowerCLMUL(Context, CI->getArgOperand(0), CI->getArgOperand(1), CI));
+ break;
+
case Intrinsic::cttz: {
// cttz(x) -> ctpop(~X & (X-1))
Value *Src = CI->getArgOperand(0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3ebd3a4b88097..a4e350702ebc8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7188,6 +7188,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
return;
}
+ case Intrinsic::clmul: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::CLMUL, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
case Intrinsic::sadd_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 803894e298dd5..4fba332f806fd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -298,6 +298,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ROTR: return "rotr";
case ISD::FSHL: return "fshl";
case ISD::FSHR: return "fshr";
+ case ISD::CLMUL: return "clmul";
case ISD::FADD: return "fadd";
case ISD::STRICT_FADD: return "strict_fadd";
case ISD::FSUB: return "fsub";
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5e761fccc815a..470685b9c68bb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10347,6 +10347,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
}
+ case Intrinsic::clmul:
case Intrinsic::riscv_clmul:
return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
Op.getOperand(2));
@@ -14283,6 +14284,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
}
+ case Intrinsic::clmul:
case Intrinsic::riscv_clmul: {
if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
return;
>From 1717aac5530bb180fd180801c0848bfdde5c25a1 Mon Sep 17 00:00:00 2001
From: Oscar Smith <oscardssmith at gmail.com>
Date: Sun, 18 May 2025 09:13:34 -0400
Subject: [PATCH 2/2] --amend
---
llvm/docs/LangRef.rst | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 1444654a248e2..fc1daa2f2adbd 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -18093,7 +18093,7 @@ on any integer bit width or vectors of integers.
Overview
"""""""""
-The '``llvm.clmul``' family of intrinsics functions perform carryless multiplication
+The '``llvm.clmul``' family of intrinsic functions performs carryless multiplication
(also known as xor multiplication) on the 2 arguments.
Arguments
More information about the llvm-commits
mailing list