[llvm] [DAG] visitCLMUL - fold (clmul x, c_pow2) -> (shl x, log2(c_pow2)) (PR #184049)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 2 02:23:25 PST 2026
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/184049
>From 44db3c881297c17b14d39c0e2f1dc98650baae9b Mon Sep 17 00:00:00 2001
From: fbrv <fabio.baravalle at gmail.com>
Date: Sun, 1 Mar 2026 23:45:59 +0000
Subject: [PATCH] [DAG] visitCLMUL - fold (clmul x, c_pow2) -> (shl x,
log2(c_pow2))
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 12 ++++
llvm/test/CodeGen/RISCV/combine-clmul.ll | 3 +-
llvm/test/CodeGen/X86/combine-clmul.ll | 62 ++++---------------
3 files changed, 26 insertions(+), 51 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 41e77e044d8a9..56931f0087f53 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11962,6 +11962,18 @@ SDValue DAGCombiner::visitCLMUL(SDNode *N) {
if (isNullConstant(N1) || ISD::isConstantSplatVectorAllZeros(N1.getNode()))
return DAG.getConstant(0, DL, VT);
+ // fold (clmul x, c_pow2) -> (shl x, log2(c_pow2))
+ // This also handles (clmul x, 1) -> x since (shl x, 0) simplifies to x.
+ if (Opcode == ISD::CLMUL) {
+ if (ConstantSDNode *C = isConstOrConstSplat(N1)) {
+ APInt CV = C->getAPIntValue().trunc(VT.getScalarSizeInBits());
+ if (CV.isPowerOf2() &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)))
+ return DAG.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getShiftAmountConstant(CV.logBase2(), VT, DL));
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/RISCV/combine-clmul.ll b/llvm/test/CodeGen/RISCV/combine-clmul.ll
index 35354750ccf80..421ebc807b0ee 100644
--- a/llvm/test/CodeGen/RISCV/combine-clmul.ll
+++ b/llvm/test/CodeGen/RISCV/combine-clmul.ll
@@ -23,8 +23,7 @@ define i64 @combine_i64_cmul_larger() nounwind {
define i64 @combine_i64_cmul_rhs(i64 %x) nounwind {
; CHECK-LABEL: combine_i64_cmul_rhs:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: clmul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: ret
%res = call i64 @llvm.clmul.i64(i64 32, i64 %x)
ret i64 %res
diff --git a/llvm/test/CodeGen/X86/combine-clmul.ll b/llvm/test/CodeGen/X86/combine-clmul.ll
index da61c7243736e..1581f7cfc64e2 100644
--- a/llvm/test/CodeGen/X86/combine-clmul.ll
+++ b/llvm/test/CodeGen/X86/combine-clmul.ll
@@ -18,61 +18,25 @@ define i32 @clmul_i32_zero(i32 %a) {
; Test with constant 1 - should optimize to just returning %a
define i32 @clmul_i32_one(i32 %a) {
-; SCALAR-LABEL: clmul_i32_one:
-; SCALAR: # %bb.0:
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: retq
-;
-; SSE-PCLMUL-LABEL: clmul_i32_one:
-; SSE-PCLMUL: # %bb.0:
-; SSE-PCLMUL-NEXT: movl $1, %eax
-; SSE-PCLMUL-NEXT: movq %rax, %xmm0
-; SSE-PCLMUL-NEXT: movd %edi, %xmm1
-; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
-; SSE-PCLMUL-NEXT: movq %xmm1, %rax
-; SSE-PCLMUL-NEXT: # kill: def $eax killed $eax killed $rax
-; SSE-PCLMUL-NEXT: retq
-;
-; AVX-LABEL: clmul_i32_one:
-; AVX: # %bb.0:
-; AVX-NEXT: movl $1, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
-; AVX-NEXT: vmovd %edi, %xmm1
-; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vmovq %xmm0, %rax
-; AVX-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX-NEXT: retq
+; CHECK-LABEL: clmul_i32_one:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
%res = call i32 @llvm.clmul.i32(i32 %a, i32 1)
ret i32 %res
}
; Test with power of 2 - should become a shift
define i32 @clmul_i32_pow2(i32 %a) {
-; SCALAR-LABEL: clmul_i32_pow2:
-; SCALAR: # %bb.0:
-; SCALAR-NEXT: movl %edi, %eax
-; SCALAR-NEXT: shll $4, %eax
-; SCALAR-NEXT: retq
-;
-; SSE-PCLMUL-LABEL: clmul_i32_pow2:
-; SSE-PCLMUL: # %bb.0:
-; SSE-PCLMUL-NEXT: movl $16, %eax
-; SSE-PCLMUL-NEXT: movq %rax, %xmm0
-; SSE-PCLMUL-NEXT: movd %edi, %xmm1
-; SSE-PCLMUL-NEXT: pclmulqdq $0, %xmm0, %xmm1
-; SSE-PCLMUL-NEXT: movq %xmm1, %rax
-; SSE-PCLMUL-NEXT: # kill: def $eax killed $eax killed $rax
-; SSE-PCLMUL-NEXT: retq
-;
-; AVX-LABEL: clmul_i32_pow2:
-; AVX: # %bb.0:
-; AVX-NEXT: movl $16, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
-; AVX-NEXT: vmovd %edi, %xmm1
-; AVX-NEXT: vpclmulqdq $0, %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vmovq %xmm0, %rax
-; AVX-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX-NEXT: retq
+; CHECK-LABEL: clmul_i32_pow2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: shll $4, %eax
+; CHECK-NEXT: retq
%res = call i32 @llvm.clmul.i32(i32 %a, i32 16) ; 0x10 = 1 << 4
ret i32 %res
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX: {{.*}}
+; SCALAR: {{.*}}
+; SSE-PCLMUL: {{.*}}
More information about the llvm-commits
mailing list