[llvm] [IR] Add llvm `clmul` intrinsic (PR #140301)
Oscar Smith via llvm-commits
llvm-commits at lists.llvm.org
Sat May 31 12:49:34 PDT 2025
https://github.com/oscardssmith updated https://github.com/llvm/llvm-project/pull/140301
>From 6566c30ed8015b31391c702270ba2319f8f41f2d Mon Sep 17 00:00:00 2001
From: Oscar Smith <oscardssmith at gmail.com>
Date: Fri, 16 May 2025 12:15:08 -0400
Subject: [PATCH 01/11] add clmul docs
---
llvm/docs/LangRef.rst | 48 +++++++++++++++++++
llvm/include/llvm/CodeGen/ISDOpcodes.h | 3 ++
llvm/include/llvm/IR/Intrinsics.td | 8 ++++
llvm/lib/CodeGen/IntrinsicLowering.cpp | 23 +++++++++
.../SelectionDAG/SelectionDAGBuilder.cpp | 6 +++
.../SelectionDAG/SelectionDAGDumper.cpp | 1 +
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +
7 files changed, 91 insertions(+)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 8c0a046d3a7e9..16458566ece51 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -18084,6 +18084,54 @@ Example:
%r = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) ; %r = i8: 225 (0b11100001)
%r = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) ; %r = i8: 255 (0b11111111)
+.. clmul:
+
+'``clmul.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.clmul``
+on any integer bit width or vectors of integers.
+
+::
+
+ declare i16 @llvm.clmul.i16(i16 %a, i16 %b)
+ declare i32 @llvm.clmul.i32(i32 %a, i32 %b)
+ declare i64 @llvm.clmul.i64(i64 %a, i64 %b)
+ declare <4 x i32> @llvm.clmul.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+Overview
+"""""""""
+
+The '``llvm.clmul``' family of intrinsics functions perform carryless multiplication
+(also known as xor multiplication) on the 2 arguments.
+
+Arguments
+""""""""""
+
+The arguments (%a and %b) and the result may be of integer types of any bit
+width, but they must have the same bit width. ``%a`` and ``%b`` are the two
+values that will undergo carryless multiplication.
+
+Semantics:
+""""""""""
+
+The ‘llvm.clmul’ intrinsic computes carryless multiply of ``%a`` and ``%b``, which is the result
+of applying the standard multiplication algorithm if you replace all of the aditions with exclusive ors.
+The vector intrinsics, such as llvm.clmul.v4i32, operate on a per-element basis and the element order is not affected.
+
+Examples
+"""""""""
+
+.. code-block:: llvm
+
+ %res = call i4 @llvm.clmul.i4(i4 1, i4 2) ; %res = 2
+ %res = call i4 @llvm.clmul.i4(i4 5, i4 6) ; %res = 14
+ %res = call i4 @llvm.clmul.i4(i4 -4, i4 2) ; %res = -8
+ %res = call i4 @llvm.clmul.i4(i4 -4, i4 -5) ; %res = -12
+
Arithmetic with Overflow Intrinsics
-----------------------------------
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 47a1aec3da06a..47c2b9d71ff3c 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -760,6 +760,9 @@ enum NodeType {
ROTR,
FSHL,
FSHR,
+
+ /// Carryless multiplication operator
+ CLMUL,
/// Byte Swap and Counting operators.
BSWAP,
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index d3899056bc240..653ec1c0bf076 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1426,6 +1426,8 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_fshr : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
+ def int_clmul : DefaultAttrsIntrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
}
let IntrProperties = [IntrNoMem, IntrSpeculatable,
@@ -2098,6 +2100,12 @@ let IntrProperties = [IntrNoMem] in {
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
+ def int_vp_clmul : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_sadd_sat : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 1518ead7698be..d66544ee87ea4 100644
--- a/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -199,6 +199,25 @@ static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
return LowerCTPOP(Context, V, IP);
}
+/// Emit the code to lower clmul of V1, V2 before the specified instruction IP.
+static Value *LowerCLMUL(LLVMContext &Context, Value *V1, Value *V2, Instruction *IP) {
+
+ IRBuilder<> Builder(IP);
+
+ unsigned BitSize = V1->getType()->getPrimitiveSizeInBits();
+ Value *Res = ConstantInt::get(V1->getType(), 0);
+ Value *Zero = ConstantInt::get(V1->getType(), 0);
+ Value *One = ConstantInt::get(V1->getType(), 1);
+ for (unsigned I = 1; I < BitSize; I ++) {
+ Value *LowBit = Builder.CreateAnd(V1, One, "clmul.isodd");
+ Value *Pred = Builder.CreateSelect(LowBit, V2, Zero, "clmul.V2_or_zero");
+ Res = Builder.CreateXor(Res, Pred, "clmul.Res");
+ V1 = Builder.CreateLShr(V1, One, "clmul.V1");
+ V2 = Builder.CreateShl(V2, One, "clmul.V2");
+ }
+ return LowerCTPOP(Context, Res, IP);
+}
+
static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
const char *Dname,
const char *LDname) {
@@ -262,6 +281,10 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI));
break;
+ case Intrinsic::clmul:
+ CI->replaceAllUsesWith(LowerCLMUL(Context, CI->getArgOperand(0), CI->getArgOperand(1), CI));
+ break;
+
case Intrinsic::cttz: {
// cttz(x) -> ctpop(~X & (X-1))
Value *Src = CI->getArgOperand(0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 77771ee56e828..c705f9c565bcf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7185,6 +7185,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
return;
}
+ case Intrinsic::clmul: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::CLMUL, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
case Intrinsic::sadd_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 539f583ea361f..c68e608a6cd97 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -299,6 +299,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ROTR: return "rotr";
case ISD::FSHL: return "fshl";
case ISD::FSHR: return "fshr";
+ case ISD::CLMUL: return "clmul";
case ISD::FADD: return "fadd";
case ISD::STRICT_FADD: return "strict_fadd";
case ISD::FSUB: return "fsub";
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6e8e4ac1c6a95..81e6661ef6987 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10420,6 +10420,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
}
+ case Intrinsic::clmul:
case Intrinsic::riscv_clmul:
return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
Op.getOperand(2));
@@ -14367,6 +14368,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
}
+ case Intrinsic::clmul:
case Intrinsic::riscv_clmul: {
if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
return;
>From 91bf4f5f8a9e49e9399cc90056bdbe8a4b9d1a55 Mon Sep 17 00:00:00 2001
From: Oscar Smith <oscardssmith at gmail.com>
Date: Sun, 18 May 2025 09:13:34 -0400
Subject: [PATCH 02/11] --amend
---
llvm/docs/LangRef.rst | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 16458566ece51..514173a6d5618 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -18105,7 +18105,7 @@ on any integer bit width or vectors of integers.
Overview
"""""""""
-The '``llvm.clmul``' family of intrinsics functions perform carryless multiplication
+The '``llvm.clmul``' family of intrinsic functions performs carryless multiplication
(also known as xor multiplication) on the 2 arguments.
Arguments
>From ebf8954912162bab19a1ec15f0b6bf4e62a77b0e Mon Sep 17 00:00:00 2001
From: Oscar Smith <oscardssmith at gmail.com>
Date: Tue, 20 May 2025 00:54:25 -0400
Subject: [PATCH 03/11] teach selection dag about clmul legalization
---
llvm/include/llvm/CodeGen/TargetLowering.h | 5 +++
llvm/lib/CodeGen/IntrinsicLowering.cpp | 2 +-
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 4 +++
.../CodeGen/SelectionDAG/TargetLowering.cpp | 31 +++++++++++++++++++
llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 ++
llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 1 +
6 files changed, 45 insertions(+), 1 deletion(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 9c453f51e129d..b5982c8fb76ae 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5409,6 +5409,11 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
/// \returns The expansion if successful, SDValue() otherwise
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const;
+ /// Expand carryless multiply.
+ /// \param N Node to expand
+ /// \returns The expansion if successful, SDValue() otherwise
+ SDValue expandCLMUL(SDNode *N, SelectionDAG &DAG) const;
+
/// Expand rotations.
/// \param N Node to expand
/// \param AllowVectorOps expand vector rotate, this should only be performed
diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp
index d66544ee87ea4..8e6020d1055e9 100644
--- a/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -215,7 +215,7 @@ static Value *LowerCLMUL(LLVMContext &Context, Value *V1, Value *V2, Instruction
V1 = Builder.CreateLShr(V1, One, "clmul.V1");
V2 = Builder.CreateShl(V2, One, "clmul.V2");
}
- return LowerCTPOP(Context, Res, IP);
+ return Res;
}
static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 528c07cc5549d..8dee5d1d769b0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3907,6 +3907,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG))
Results.push_back(Expanded);
break;
+ case ISD::CLMUL:
+ if (SDValue Expanded = TLI.expandCLMUL(Node, DAG))
+ Results.push_back(Expanded);
+ break;
case ISD::ROTL:
case ISD::ROTR:
if (SDValue Expanded = TLI.expandROT(Node, true /*AllowVectorOps*/, DAG))
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 22d0bc9914585..78efb60321755 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8138,6 +8138,37 @@ SDValue TargetLowering::expandFunnelShift(SDNode *Node,
return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
}
+SDValue TargetLowering::expandCLMUL(SDNode *Node,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue V1 = Node->getOperand(0);
+ SDValue V2 = Node->getOperand(1);
+ unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ // This includes the operations needed to expand CTPOP if it isn't supported.
+ if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
+ (!isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !isOperationLegalOrCustom(ISD::SHL, VT) ||
+ !isOperationLegalOrCustom(ISD::XOR, VT) ||
+ !isOperationLegalOrCustom(ISD::AND, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, VT))))
+ return SDValue();
+
+ SDValue Res = DAG.getConstant(0, DL, VT);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue One = DAG.getConstant(1, DL, VT);
+ for (unsigned i = 0; i < NumBitsPerElt; ++i) {
+ SDValue LowBit = DAG.getNode(ISD::AND, DL, VT, V1, One);
+ SDValue Pred = DAG.getNode(ISD::SELECT, DL, VT, LowBit, V2, Zero);
+ Res = DAG.getNode(ISD::XOR, DL, VT, Res, Pred);
+ V1 = DAG.getNode(ISD::SRL, DL, VT, V1, One);
+ V2 = DAG.getNode(ISD::SHL, DL, VT, V2, One);
+ }
+ return Res;
+}
+
// TODO: Merge with expandFunnelShift.
SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
SelectionDAG &DAG) const {
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 935afaf9dd550..f59eb6b01d389 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -781,6 +781,9 @@ void TargetLoweringBase::initActions() {
// Absolute difference
setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Expand);
+ // Carryless multiply
+ setOperationAction(ISD::CLMUL, VT, Expand);
+
// Saturated trunc
setOperationAction(ISD::TRUNCATE_SSAT_S, VT, Expand);
setOperationAction(ISD::TRUNCATE_SSAT_U, VT, Expand);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 4353e94bdb1d0..fbd227922bd72 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -794,6 +794,7 @@ def : Sh3AddPat<SH3ADD>;
} // Predicates = [HasStdExtZba, IsRV64]
let Predicates = [HasStdExtZbcOrZbkc] in {
+def : PatGprGpr<clmul, CLMUL>;
def : PatGprGpr<riscv_clmul, CLMUL>;
def : PatGprGpr<riscv_clmulh, CLMULH>;
} // Predicates = [HasStdExtZbcOrZbkc]
>From 7dfad3451442e1f7b0ba8ea3f636b02c1248c8d4 Mon Sep 17 00:00:00 2001
From: Oscar Smith <oscardssmith at gmail.com>
Date: Tue, 20 May 2025 01:29:35 -0400
Subject: [PATCH 04/11] fix
---
llvm/lib/CodeGen/IntrinsicLowering.cpp | 2 +-
llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 8e6020d1055e9..a8c85469086b8 100644
--- a/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -204,7 +204,7 @@ static Value *LowerCLMUL(LLVMContext &Context, Value *V1, Value *V2, Instruction
IRBuilder<> Builder(IP);
- unsigned BitSize = V1->getType()->getPrimitiveSizeInBits();
+ unsigned BitSize = V1->getType()->getScalarSizeInBits();
Value *Res = ConstantInt::get(V1->getType(), 0);
Value *Zero = ConstantInt::get(V1->getType(), 0);
Value *One = ConstantInt::get(V1->getType(), 1);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index fbd227922bd72..97fdbe85417fe 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -54,6 +54,7 @@ def riscv_unzip : RVSDNode<"UNZIP", SDTIntUnaryOp>;
def riscv_absw : RVSDNode<"ABSW", SDTIntUnaryOp>;
// Scalar cryptography
+def clmul : SDNode<"CLMUL", SDTIntBinOp>;
def riscv_clmul : RVSDNode<"CLMUL", SDTIntBinOp>;
def riscv_clmulh : RVSDNode<"CLMULH", SDTIntBinOp>;
def riscv_clmulr : RVSDNode<"CLMULR", SDTIntBinOp>;
>From c4a6c55068207844a21773519b1d18f3c19dd07f Mon Sep 17 00:00:00 2001
From: Oscar Smith <oscardssmith at gmail.com>
Date: Tue, 20 May 2025 14:23:34 -0400
Subject: [PATCH 05/11] fixes
---
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 16 ++++++++++++----
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 6 ++++--
llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 2 +-
3 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 78efb60321755..7fd29afc314aa 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8139,33 +8139,41 @@ SDValue TargetLowering::expandFunnelShift(SDNode *Node,
}
SDValue TargetLowering::expandCLMUL(SDNode *Node,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
SDLoc DL(Node);
EVT VT = Node->getValueType(0);
SDValue V1 = Node->getOperand(0);
SDValue V2 = Node->getOperand(1);
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+ EVT SetCCType =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
// Only expand vector types if we have the appropriate vector bit operations.
- // This includes the operations needed to expand CTPOP if it isn't supported.
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
(!isOperationLegalOrCustom(ISD::SRL, VT) ||
!isOperationLegalOrCustom(ISD::SHL, VT) ||
!isOperationLegalOrCustom(ISD::XOR, VT) ||
!isOperationLegalOrCustom(ISD::AND, VT) ||
+ !isOperationLegalOrCustom(ISD::SELECT, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::OR, VT))))
return SDValue();
SDValue Res = DAG.getConstant(0, DL, VT);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
- for (unsigned i = 0; i < NumBitsPerElt; ++i) {
+ for (unsigned i = 0; i < NumBitsPerElt-1; ++i) {
SDValue LowBit = DAG.getNode(ISD::AND, DL, VT, V1, One);
- SDValue Pred = DAG.getNode(ISD::SELECT, DL, VT, LowBit, V2, Zero);
+ SDValue LowBool = DAG.getSetCC(DL, SetCCType, LowBit, One, ISD::SETULT);
+ SDValue Pred = DAG.getNode(ISD::SELECT, DL, VT, LowBool, V2, Zero);
Res = DAG.getNode(ISD::XOR, DL, VT, Res, Pred);
V1 = DAG.getNode(ISD::SRL, DL, VT, V1, One);
V2 = DAG.getNode(ISD::SHL, DL, VT, V2, One);
}
+ // unroll last iteration to prevent dead nodes
+ SDValue LowBit = DAG.getNode(ISD::AND, DL, VT, V1, One);
+ SDValue LowBool = DAG.getSetCC(DL, SetCCType, LowBit, One, ISD::SETULT);
+ SDValue Pred = DAG.getNode(ISD::SELECT, DL, VT, LowBool, V2, Zero);
+ Res = DAG.getNode(ISD::XOR, DL, VT, Res, Pred);
return Res;
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 81e6661ef6987..5bfec144d386d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -400,6 +400,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
Legal);
}
+ if (Subtarget.hasStdExtZbc() || Subtarget.hasStdExtZbkc()) {
+ setOperationAction(ISD::CLMUL, XLenVT, Legal);
+ }
+
if (Subtarget.hasStdExtZbb() ||
(Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
if (Subtarget.is64Bit())
@@ -10420,7 +10424,6 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
}
- case Intrinsic::clmul:
case Intrinsic::riscv_clmul:
return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
Op.getOperand(2));
@@ -14368,7 +14371,6 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
}
- case Intrinsic::clmul:
case Intrinsic::riscv_clmul: {
if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
return;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 97fdbe85417fe..2291dc9fb79eb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -54,7 +54,7 @@ def riscv_unzip : RVSDNode<"UNZIP", SDTIntUnaryOp>;
def riscv_absw : RVSDNode<"ABSW", SDTIntUnaryOp>;
// Scalar cryptography
-def clmul : SDNode<"CLMUL", SDTIntBinOp>;
+def clmul : RVSDNode<"CLMUL", SDTIntBinOp>;
def riscv_clmul : RVSDNode<"CLMUL", SDTIntBinOp>;
def riscv_clmulh : RVSDNode<"CLMULH", SDTIntBinOp>;
def riscv_clmulr : RVSDNode<"CLMULR", SDTIntBinOp>;
>From edfe0b1f54fd80dd97d4bd31aa833449c137c01a Mon Sep 17 00:00:00 2001
From: Oscar Smith <oscardssmith at gmail.com>
Date: Tue, 20 May 2025 14:26:04 -0400
Subject: [PATCH 06/11] remvoe int_vp_clmul
---
llvm/include/llvm/IR/Intrinsics.td | 6 ------
1 file changed, 6 deletions(-)
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 653ec1c0bf076..f1220e438823c 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2100,12 +2100,6 @@ let IntrProperties = [IntrNoMem] in {
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
- def int_vp_clmul : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
def int_vp_sadd_sat : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
>From 2bf56aec5a43ff36fd06c1458b28cbc06ab32c5f Mon Sep 17 00:00:00 2001
From: Oscar Smith <oscardssmith at gmail.com>
Date: Tue, 20 May 2025 21:39:09 -0400
Subject: [PATCH 07/11] finish hooking up CLMUL to selectiondag?
---
llvm/docs/LangRef.rst | 2 +-
llvm/include/llvm/IR/Intrinsics.td | 2 +-
llvm/lib/CodeGen/IntrinsicLowering.cpp | 2 +-
.../SelectionDAG/LegalizeIntegerTypes.cpp | 36 ++++++++++++++++++-
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 +
.../SelectionDAG/LegalizeVectorTypes.cpp | 3 ++
.../CodeGen/SelectionDAG/TargetLowering.cpp | 20 +++++------
7 files changed, 50 insertions(+), 16 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 514173a6d5618..5b3a1a54ced62 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -18119,7 +18119,7 @@ Semantics:
""""""""""
The ‘llvm.clmul’ intrinsic computes carryless multiply of ``%a`` and ``%b``, which is the result
-of applying the standard multiplication algorithm if you replace all of the aditions with exclusive ors.
+of applying the standard multiplication algorithm if you replace all of the additions with exclusive ors.
The vector intrinsics, such as llvm.clmul.v4i32, operate on a per-element basis and the element order is not affected.
Examples
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index f1220e438823c..f659d9f19a6d9 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1427,7 +1427,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
def int_fshr : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_clmul : DefaultAttrsIntrinsic<[llvm_anyint_ty],
- [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
+ [LLVMMatchType<0>, LLVMMatchType<0>]>;
}
let IntrProperties = [IntrNoMem, IntrSpeculatable,
diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp
index a8c85469086b8..9111790e0193b 100644
--- a/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -208,7 +208,7 @@ static Value *LowerCLMUL(LLVMContext &Context, Value *V1, Value *V2, Instruction
Value *Res = ConstantInt::get(V1->getType(), 0);
Value *Zero = ConstantInt::get(V1->getType(), 0);
Value *One = ConstantInt::get(V1->getType(), 1);
- for (unsigned I = 1; I < BitSize; I ++) {
+ for (unsigned I = 1; I < BitSize; I++) {
Value *LowBit = Builder.CreateAnd(V1, One, "clmul.isodd");
Value *Pred = Builder.CreateSelect(LowBit, V2, Zero, "clmul.V2_or_zero");
Res = Builder.CreateXor(Res, Pred, "clmul.Res");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 2bcca91f6f81a..f5047340b0418 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -207,7 +207,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VP_XOR:
case ISD::VP_ADD:
case ISD::VP_SUB:
- case ISD::VP_MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+ case ISD::VP_MUL:
+ case ISD::CLMUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
case ISD::ABDS:
case ISD::AVGCEILS:
@@ -3100,6 +3101,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
ExpandIntRes_FunnelShift(N, Lo, Hi);
break;
+ case ISD::CLMUL:
+ ExpandIntRes_CLMUL(N, Lo, Hi);
+ break;
+
case ISD::VSCALE:
ExpandIntRes_VSCALE(N, Lo, Hi);
break;
@@ -5427,6 +5432,35 @@ void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(Opc, DL, HalfVT, Select3, Select2, NewShAmt);
}
+void DAGTypeLegalizer::ExpandIntRes_CLMUL(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Values numbered from least significant to most significant.
+ SDValue In1, In2, In3, In4;
+ GetExpandedInteger(N->getOperand(0), In3, In4);
+ GetExpandedInteger(N->getOperand(1), In1, In2);
+ EVT HalfVT = In1.getValueType();
+ SDLoc DL(N);
+
+ // CLMUL is carryless so Lo is computed from the low half
+ Lo = DAG.getNode(ISD::CLMUL, DL, HalfVT, In1, In3);
+ // the high bits not included in CLMUL(A,B) can be computed by
+ // BITREVERSE(CLMUL(BITREVERSE(A), BITREVERSE(B))) >> 1
+ // Therefore we can compute the 2 hi/lo cross products
+ // and the the overflow of the low product
+ // and xor them together to compute HI
+ SDValue BitRevIn1 = DAG.getNode(ISD::BITREVERSE, DL, HalfVT, In1);
+ SDValue BitRevIn3 = DAG.getNode(ISD::BITREVERSE, DL, HalfVT, In3);
+ SDValue BitRevLoHi = DAG.getNode(ISD::CLMUL, DL, HalfVT, BitRevIn1, BitRevIn3);
+ SDValue LoHi = DAG.getNode(ISD::BITREVERSE, DL, HalfVT, BitRevLoHi);
+ SDValue One = DAG.getConstant(0, DL, HalfVT);
+ Hi = DAG.getNode(ISD::SRL, DL, HalfVT, LoHi, One);
+
+ SDValue HITMP = DAG.getNode(ISD::CLMUL, DL, HalfVT, In1, In4);
+ Hi = DAG.getNode(ISD::XOR, DL, HalfVT, Hi, HITMP);
+ HITMP = DAG.getNode(ISD::CLMUL, DL, HalfVT, In2, In3);
+ Hi = DAG.getNode(ISD::XOR, DL, HalfVT, Hi, HITMP);
+}
+
void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT VT = N->getValueType(0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index dd9af47da5287..f553105d744cf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -509,6 +509,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void ExpandIntRes_Rotate (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FunnelShift (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CLMUL (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_VSCALE (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 4d844f0036a75..a72f82ac0d671 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -166,6 +166,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
+ case ISD::CLMUL:
case ISD::SADDSAT:
case ISD::UADDSAT:
@@ -1330,6 +1331,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMAX: case ISD::VP_SMAX:
case ISD::UMIN: case ISD::VP_UMIN:
case ISD::UMAX: case ISD::VP_UMAX:
+ case ISD::CLMUL:
case ISD::SADDSAT: case ISD::VP_SADDSAT:
case ISD::UADDSAT: case ISD::VP_UADDSAT:
case ISD::SSUBSAT: case ISD::VP_SSUBSAT:
@@ -4689,6 +4691,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SSUBSAT: case ISD::VP_SSUBSAT:
case ISD::SSHLSAT:
case ISD::USHLSAT:
+ case ISD::CLMUL:
case ISD::ROTL:
case ISD::ROTR:
case ISD::AVGFLOORS:
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 7fd29afc314aa..9dbc821ddac29 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8154,26 +8154,22 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node,
!isOperationLegalOrCustom(ISD::SHL, VT) ||
!isOperationLegalOrCustom(ISD::XOR, VT) ||
!isOperationLegalOrCustom(ISD::AND, VT) ||
- !isOperationLegalOrCustom(ISD::SELECT, VT) ||
- !isOperationLegalOrCustomOrPromote(ISD::OR, VT))))
+ !isOperationLegalOrCustom(ISD::SELECT, VT))))
return SDValue();
SDValue Res = DAG.getConstant(0, DL, VT);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
- for (unsigned i = 0; i < NumBitsPerElt-1; ++i) {
+ for (unsigned I = 0; I < NumBitsPerElt-1; ++I) {
SDValue LowBit = DAG.getNode(ISD::AND, DL, VT, V1, One);
- SDValue LowBool = DAG.getSetCC(DL, SetCCType, LowBit, One, ISD::SETULT);
+ SDValue LowBool = DAG.getSetCC(DL, SetCCType, LowBit, Zero, ISD::SETNE);
SDValue Pred = DAG.getNode(ISD::SELECT, DL, VT, LowBool, V2, Zero);
Res = DAG.getNode(ISD::XOR, DL, VT, Res, Pred);
- V1 = DAG.getNode(ISD::SRL, DL, VT, V1, One);
- V2 = DAG.getNode(ISD::SHL, DL, VT, V2, One);
- }
- // unroll last iteration to prevent dead nodes
- SDValue LowBit = DAG.getNode(ISD::AND, DL, VT, V1, One);
- SDValue LowBool = DAG.getSetCC(DL, SetCCType, LowBit, One, ISD::SETULT);
- SDValue Pred = DAG.getNode(ISD::SELECT, DL, VT, LowBool, V2, Zero);
- Res = DAG.getNode(ISD::XOR, DL, VT, Res, Pred);
+ if (I != NumBitsPerElt) {
+ V1 = DAG.getNode(ISD::SRL, DL, VT, V1, One);
+ V2 = DAG.getNode(ISD::SHL, DL, VT, V2, One);
+ }
+ }
return Res;
}
>From 2117cbd332932a6ccb180078b91dbdf1f20a8da1 Mon Sep 17 00:00:00 2001
From: Oscar Smith <oscar.smith at juliacomputing.com>
Date: Wed, 21 May 2025 21:31:17 +0000
Subject: [PATCH 08/11] cleanup
---
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 3 +--
.../SelectionDAG/LegalizeIntegerTypes.cpp | 22 +++++++++----------
.../SelectionDAG/SelectionDAGBuilder.cpp | 6 ++++-
.../CodeGen/SelectionDAG/TargetLowering.cpp | 4 ++--
4 files changed, 19 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 8dee5d1d769b0..2758f6f6349d6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3908,8 +3908,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Expanded);
break;
case ISD::CLMUL:
- if (SDValue Expanded = TLI.expandCLMUL(Node, DAG))
- Results.push_back(Expanded);
+ Results.push_back(TLI.expandCLMUL(Node, DAG));
break;
case ISD::ROTL:
case ISD::ROTR:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index f5047340b0418..f145aabc00065 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -5435,29 +5435,29 @@ void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandIntRes_CLMUL(SDNode *N, SDValue &Lo,
SDValue &Hi) {
// Values numbered from least significant to most significant.
- SDValue In1, In2, In3, In4;
- GetExpandedInteger(N->getOperand(0), In3, In4);
- GetExpandedInteger(N->getOperand(1), In1, In2);
- EVT HalfVT = In1.getValueType();
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+ EVT HalfVT = LL.getValueType();
SDLoc DL(N);
// CLMUL is carryless so Lo is computed from the low half
- Lo = DAG.getNode(ISD::CLMUL, DL, HalfVT, In1, In3);
+ Lo = DAG.getNode(ISD::CLMUL, DL, HalfVT, LL, RL);
// the high bits not included in CLMUL(A,B) can be computed by
// BITREVERSE(CLMUL(BITREVERSE(A), BITREVERSE(B))) >> 1
// Therefore we can compute the 2 hi/lo cross products
// and the the overflow of the low product
// and xor them together to compute HI
- SDValue BitRevIn1 = DAG.getNode(ISD::BITREVERSE, DL, HalfVT, In1);
- SDValue BitRevIn3 = DAG.getNode(ISD::BITREVERSE, DL, HalfVT, In3);
- SDValue BitRevLoHi = DAG.getNode(ISD::CLMUL, DL, HalfVT, BitRevIn1, BitRevIn3);
+ SDValue BitRevLL = DAG.getNode(ISD::BITREVERSE, DL, HalfVT, LL);
+ SDValue BitRevRL = DAG.getNode(ISD::BITREVERSE, DL, HalfVT, RL);
+ SDValue BitRevLoHi = DAG.getNode(ISD::CLMUL, DL, HalfVT, BitRevLL, BitRevRL);
SDValue LoHi = DAG.getNode(ISD::BITREVERSE, DL, HalfVT, BitRevLoHi);
- SDValue One = DAG.getConstant(0, DL, HalfVT);
+ SDValue One = DAG.getShiftAmountConstant(1, HalfVT, DL);
Hi = DAG.getNode(ISD::SRL, DL, HalfVT, LoHi, One);
- SDValue HITMP = DAG.getNode(ISD::CLMUL, DL, HalfVT, In1, In4);
+ SDValue HITMP = DAG.getNode(ISD::CLMUL, DL, HalfVT, LL, RH);
Hi = DAG.getNode(ISD::XOR, DL, HalfVT, Hi, HITMP);
- HITMP = DAG.getNode(ISD::CLMUL, DL, HalfVT, In2, In3);
+ HITMP = DAG.getNode(ISD::CLMUL, DL, HalfVT, LH, RL);
Hi = DAG.getNode(ISD::XOR, DL, HalfVT, Hi, HITMP);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index c705f9c565bcf..6a7e87cea4bd8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7188,7 +7188,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::clmul: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
- setValue(&I, DAG.getNode(ISD::CLMUL, sdl, Op1.getValueType(), Op1, Op2));
+ EVT VT = Op1.getValueType();
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(Op1.getValueType() == Op2.getValueType() &&
+ Op1.getValueType() == VT && "Binary operator types must match!");
+ setValue(&I, DAG.getNode(ISD::CLMUL, sdl, VT, Op1, Op2));
return;
}
case Intrinsic::sadd_sat: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9dbc821ddac29..7585798f95710 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8160,12 +8160,12 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node,
SDValue Res = DAG.getConstant(0, DL, VT);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
- for (unsigned I = 0; I < NumBitsPerElt-1; ++I) {
+ for (unsigned I = 0; I < NumBitsPerElt; ++I) {
SDValue LowBit = DAG.getNode(ISD::AND, DL, VT, V1, One);
SDValue LowBool = DAG.getSetCC(DL, SetCCType, LowBit, Zero, ISD::SETNE);
SDValue Pred = DAG.getNode(ISD::SELECT, DL, VT, LowBool, V2, Zero);
Res = DAG.getNode(ISD::XOR, DL, VT, Res, Pred);
- if (I != NumBitsPerElt) {
+ if (I != NumBitsPerElt-1) {
V1 = DAG.getNode(ISD::SRL, DL, VT, V1, One);
V2 = DAG.getNode(ISD::SHL, DL, VT, V2, One);
}
>From cdb40cd4431b7b2b2e00d926ea976c5757b0a5b1 Mon Sep 17 00:00:00 2001
From: Oscar Smith <oscar.smith at juliacomputing.com>
Date: Wed, 21 May 2025 21:38:19 +0000
Subject: [PATCH 09/11] scalarize vector clmul on error
---
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 7585798f95710..2ac3c4ae90a3b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8149,13 +8149,15 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node,
EVT SetCCType =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
// Only expand vector types if we have the appropriate vector bit operations.
+ // FIXME: Should really try to split the vector in case it's legal on a
+ // subvector.
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
(!isOperationLegalOrCustom(ISD::SRL, VT) ||
!isOperationLegalOrCustom(ISD::SHL, VT) ||
!isOperationLegalOrCustom(ISD::XOR, VT) ||
!isOperationLegalOrCustom(ISD::AND, VT) ||
!isOperationLegalOrCustom(ISD::SELECT, VT))))
- return SDValue();
+ return DAG.UnrollVectorOp(Node);
SDValue Res = DAG.getConstant(0, DL, VT);
SDValue Zero = DAG.getConstant(0, DL, VT);
>From 652340d740423a800602b86ef3d2ecdfacab350a Mon Sep 17 00:00:00 2001
From: Oscar Smith <oscardssmith at gmail.com>
Date: Wed, 21 May 2025 19:57:37 -0400
Subject: [PATCH 10/11] address review
---
llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 1 +
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 1 +
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 6 +-----
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 7 ++++---
4 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index f145aabc00065..541f92e04b188 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -5448,6 +5448,7 @@ void DAGTypeLegalizer::ExpandIntRes_CLMUL(SDNode *N, SDValue &Lo,
// Therefore we can compute the 2 hi/lo cross products
// and the the overflow of the low product
// and xor them together to compute HI
+ // TODO: if the target supports a widening CLMUL or a CLMULH we should probably use that
SDValue BitRevLL = DAG.getNode(ISD::BITREVERSE, DL, HalfVT, LL);
SDValue BitRevRL = DAG.getNode(ISD::BITREVERSE, DL, HalfVT, RL);
SDValue BitRevLoHi = DAG.getNode(ISD::CLMUL, DL, HalfVT, BitRevLL, BitRevRL);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 1506bc4ee187d..be0e745ebd14f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7393,6 +7393,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::SSUBSAT:
case ISD::UADDSAT:
case ISD::USUBSAT:
+ case ISD::CLMUL:
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 6a7e87cea4bd8..c705f9c565bcf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7188,11 +7188,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::clmul: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
- EVT VT = Op1.getValueType();
- assert(VT.isInteger() && "This operator does not apply to FP types!");
- assert(Op1.getValueType() == Op2.getValueType() &&
- Op1.getValueType() == VT && "Binary operator types must match!");
- setValue(&I, DAG.getNode(ISD::CLMUL, sdl, VT, Op1, Op2));
+ setValue(&I, DAG.getNode(ISD::CLMUL, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::sadd_sat: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 2ac3c4ae90a3b..14f5f4b2e2909 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8162,14 +8162,15 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node,
SDValue Res = DAG.getConstant(0, DL, VT);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
+ SDValue OneForShift = DAG.getShiftAmountConstant(1, VT, DL);
for (unsigned I = 0; I < NumBitsPerElt; ++I) {
SDValue LowBit = DAG.getNode(ISD::AND, DL, VT, V1, One);
SDValue LowBool = DAG.getSetCC(DL, SetCCType, LowBit, Zero, ISD::SETNE);
SDValue Pred = DAG.getNode(ISD::SELECT, DL, VT, LowBool, V2, Zero);
Res = DAG.getNode(ISD::XOR, DL, VT, Res, Pred);
- if (I != NumBitsPerElt-1) {
- V1 = DAG.getNode(ISD::SRL, DL, VT, V1, One);
- V2 = DAG.getNode(ISD::SHL, DL, VT, V2, One);
+ if (I != NumBitsPerElt - 1) {
+ V1 = DAG.getNode(ISD::SRL, DL, VT, V1, OneForShift);
+ V2 = DAG.getNode(ISD::SHL, DL, VT, V2, OneForShift);
}
}
return Res;
>From 14c2226c66b7b1e776a6608bdd8a41728c027396 Mon Sep 17 00:00:00 2001
From: Oscar Smith <oscardssmith at gmail.com>
Date: Sat, 31 May 2025 15:49:15 -0400
Subject: [PATCH 11/11] format
---
.../CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 541f92e04b188..b465e0c29235c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -5433,7 +5433,7 @@ void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo,
}
void DAGTypeLegalizer::ExpandIntRes_CLMUL(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+ SDValue &Hi) {
// Values numbered from least significant to most significant.
SDValue LL, LH, RL, RH;
GetExpandedInteger(N->getOperand(0), LL, LH);
@@ -5441,9 +5441,10 @@ void DAGTypeLegalizer::ExpandIntRes_CLMUL(SDNode *N, SDValue &Lo,
EVT HalfVT = LL.getValueType();
SDLoc DL(N);
- // CLMUL is carryless so Lo is computed from the low half
+ // Lo is computed from the low half
Lo = DAG.getNode(ISD::CLMUL, DL, HalfVT, LL, RL);
- // the high bits not included in CLMUL(A,B) can be computed by
+ // CLMUL is carryless so the high bits not included in CLMUL(A,B)
+ // can be computed by
// BITREVERSE(CLMUL(BITREVERSE(A), BITREVERSE(B))) >> 1
// Therefore we can compute the 2 hi/lo cross products
// and the the overflow of the low product
@@ -5456,10 +5457,10 @@ void DAGTypeLegalizer::ExpandIntRes_CLMUL(SDNode *N, SDValue &Lo,
SDValue One = DAG.getShiftAmountConstant(1, HalfVT, DL);
Hi = DAG.getNode(ISD::SRL, DL, HalfVT, LoHi, One);
- SDValue HITMP = DAG.getNode(ISD::CLMUL, DL, HalfVT, LL, RH);
- Hi = DAG.getNode(ISD::XOR, DL, HalfVT, Hi, HITMP);
- HITMP = DAG.getNode(ISD::CLMUL, DL, HalfVT, LH, RL);
- Hi = DAG.getNode(ISD::XOR, DL, HalfVT, Hi, HITMP);
+ SDValue HiTmp = DAG.getNode(ISD::CLMUL, DL, HalfVT, LL, RH);
+ Hi = DAG.getNode(ISD::XOR, DL, HalfVT, Hi, HiTmp);
+ HiTmp = DAG.getNode(ISD::CLMUL, DL, HalfVT, LH, RL);
+ Hi = DAG.getNode(ISD::XOR, DL, HalfVT, Hi, HiTmp);
}
void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
More information about the llvm-commits
mailing list