[llvm] [DAG] Constant fold ISD::FSHL/FSHR nodes (PR #154480)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 21 09:53:29 PDT 2025
https://github.com/XChy updated https://github.com/llvm/llvm-project/pull/154480
>From de442b659229e6da60a9d870a16993b153ed58c1 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Wed, 20 Aug 2025 14:10:12 +0800
Subject: [PATCH 1/2] [SelectionDAG] NFC. Add constant fold testcases for
fshl/fshr
---
llvm/test/CodeGen/X86/fshl-fshr-constant.ll | 158 ++++++++++++++++++++
1 file changed, 158 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/fshl-fshr-constant.ll
diff --git a/llvm/test/CodeGen/X86/fshl-fshr-constant.ll b/llvm/test/CodeGen/X86/fshl-fshr-constant.ll
new file mode 100644
index 0000000000000..022af2be316dc
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fshl-fshr-constant.ll
@@ -0,0 +1,158 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,CHECK-EXPAND
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,CHECK-UNEXPAND
+
+define <4 x i32> @test_fshl_constants() {
+; CHECK-EXPAND-LABEL: test_fshl_constants:
+; CHECK-EXPAND: # %bb.0:
+; CHECK-EXPAND-NEXT: vmovaps {{.*#+}} xmm0 = [0,512,2048,6144]
+; CHECK-EXPAND-NEXT: retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_constants:
+; CHECK-UNEXPAND: # %bb.0:
+; CHECK-UNEXPAND-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,5,6,7]
+; CHECK-UNEXPAND-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,1,2,3]
+; CHECK-UNEXPAND-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT: retq
+ %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshl_splat_constants() {
+; CHECK-LABEL: test_fshl_splat_constants:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [256,256,256,256]
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 4, i32 4, i32 4, i32 4>, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshl_two_constants(<4 x i32> %a) {
+; CHECK-EXPAND-LABEL: test_fshl_two_constants:
+; CHECK-EXPAND: # %bb.0:
+; CHECK-EXPAND-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-EXPAND-NEXT: retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_two_constants:
+; CHECK-UNEXPAND: # %bb.0:
+; CHECK-UNEXPAND-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,5,6,7]
+; CHECK-UNEXPAND-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT: retq
+ %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshl_one_constant(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-EXPAND-LABEL: test_fshl_one_constant:
+; CHECK-EXPAND: # %bb.0:
+; CHECK-EXPAND-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-EXPAND-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-EXPAND-NEXT: vpor %xmm1, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT: retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_one_constant:
+; CHECK-UNEXPAND: # %bb.0:
+; CHECK-UNEXPAND-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT: retq
+ %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshl_none_constant(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-EXPAND-LABEL: test_fshl_none_constant:
+; CHECK-EXPAND: # %bb.0:
+; CHECK-EXPAND-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
+; CHECK-EXPAND-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; CHECK-EXPAND-NEXT: vpsrld $1, %xmm1, %xmm1
+; CHECK-EXPAND-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1
+; CHECK-EXPAND-NEXT: vpand %xmm3, %xmm2, %xmm2
+; CHECK-EXPAND-NEXT: vpsllvd %xmm2, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT: vpor %xmm1, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT: retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_none_constant:
+; CHECK-UNEXPAND: # %bb.0:
+; CHECK-UNEXPAND-NEXT: vpshldvd %xmm2, %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT: retq
+ %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_constants() {
+; CHECK-EXPAND-LABEL: test_fshr_constants:
+; CHECK-EXPAND: # %bb.0:
+; CHECK-EXPAND-NEXT: vmovaps {{.*#+}} xmm0 = [0,8388608,8388608,6291456]
+; CHECK-EXPAND-NEXT: retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_constants:
+; CHECK-UNEXPAND: # %bb.0:
+; CHECK-UNEXPAND-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,1,2,3]
+; CHECK-UNEXPAND-NEXT: vpmovsxbd {{.*#+}} xmm0 = [4,5,6,7]
+; CHECK-UNEXPAND-NEXT: vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT: retq
+ %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_two_constants(<4 x i32> %a) {
+; CHECK-EXPAND-LABEL: test_fshr_two_constants:
+; CHECK-EXPAND: # %bb.0:
+; CHECK-EXPAND-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-EXPAND-NEXT: retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_two_constants:
+; CHECK-UNEXPAND: # %bb.0:
+; CHECK-UNEXPAND-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,5,6,7]
+; CHECK-UNEXPAND-NEXT: vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-UNEXPAND-NEXT: vmovdqa %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT: retq
+ %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_one_constant(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-EXPAND-LABEL: test_fshr_one_constant:
+; CHECK-EXPAND: # %bb.0:
+; CHECK-EXPAND-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-EXPAND-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-EXPAND-NEXT: vpor %xmm1, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT: retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_one_constant:
+; CHECK-UNEXPAND: # %bb.0:
+; CHECK-UNEXPAND-NEXT: vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-UNEXPAND-NEXT: vmovdqa %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT: retq
+ %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_none_constant(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-EXPAND-LABEL: test_fshr_none_constant:
+; CHECK-EXPAND: # %bb.0:
+; CHECK-EXPAND-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
+; CHECK-EXPAND-NEXT: vpand %xmm3, %xmm2, %xmm4
+; CHECK-EXPAND-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1
+; CHECK-EXPAND-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; CHECK-EXPAND-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT: vpsllvd %xmm2, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT: vpor %xmm1, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT: retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_none_constant:
+; CHECK-UNEXPAND: # %bb.0:
+; CHECK-UNEXPAND-NEXT: vpshrdvd %xmm2, %xmm0, %xmm1
+; CHECK-UNEXPAND-NEXT: vmovdqa %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT: retq
+ %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_splat_constants() {
+; CHECK-LABEL: test_fshr_splat_constants:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [16777216,16777216,16777216,16777216]
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 4, i32 4, i32 4, i32 4>, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
+ ret <4 x i32> %res
+}
>From d1a1e48806e25cc84236cf7458e5c9196dd059a5 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Wed, 20 Aug 2025 14:11:51 +0800
Subject: [PATCH 2/2] [SelectionDAG] Constant fold ISD:FSHL/FSHR nodes
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 ++++
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 28 +++++++++++++++++++
llvm/test/CodeGen/X86/fshl-fshr-constant.ll | 19 ++++---------
3 files changed, 38 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cee593def653c..2896c3a583f7d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11281,6 +11281,11 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
unsigned BitWidth = VT.getScalarSizeInBits();
SDLoc DL(N);
+ // fold (fshl C0, C1, C2) -> C3
+ if (SDValue C =
+ DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
+ return C;
+
// fold (fshl N0, N1, 0) -> N0
// fold (fshr N0, N1, 0) -> N1
if (isPowerOf2_32(BitWidth))
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 531297bfa9a08..53579bfd81fce 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7175,6 +7175,28 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
}
}
+ // Handle fshl/fshr special cases.
+ if (Opcode == ISD::FSHL || Opcode == ISD::FSHR) {
+ auto *C1 = dyn_cast<ConstantSDNode>(Ops[0]);
+ auto *C2 = dyn_cast<ConstantSDNode>(Ops[1]);
+ auto *C3 = dyn_cast<ConstantSDNode>(Ops[2]);
+
+ if (C1 && C2 && C3) {
+ if (C1->isOpaque() || C2->isOpaque() || C3->isOpaque())
+ return SDValue();
+ const APInt V1 = C1->getAPIntValue(), V2 = C2->getAPIntValue(),
+ V3 = C3->getAPIntValue();
+
+ APInt FoldedVal = Opcode == ISD::FSHL ? APIntOps::fshl(V1, V2, V3)
+ : APIntOps::fshr(V1, V2, V3);
+
+ SDValue Folded = getConstant(FoldedVal, DL, VT);
+ assert((!Folded || !VT.isVector()) &&
+ "Can't fold vectors ops with scalar operands");
+ return Folded;
+ }
+ }
+
// This is for vector folding only from here on.
if (!VT.isVector())
return SDValue();
@@ -8158,6 +8180,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
break;
}
+ case ISD::FSHL:
+ case ISD::FSHR:
+ // Constant folding.
+ if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2, N3}))
+ return V;
+ break;
case ISD::BUILD_VECTOR: {
// Attempt to simplify BUILD_VECTOR.
SDValue Ops[] = {N1, N2, N3};
diff --git a/llvm/test/CodeGen/X86/fshl-fshr-constant.ll b/llvm/test/CodeGen/X86/fshl-fshr-constant.ll
index 022af2be316dc..fdc34f5665b2b 100644
--- a/llvm/test/CodeGen/X86/fshl-fshr-constant.ll
+++ b/llvm/test/CodeGen/X86/fshl-fshr-constant.ll
@@ -10,9 +10,7 @@ define <4 x i32> @test_fshl_constants() {
;
; CHECK-UNEXPAND-LABEL: test_fshl_constants:
; CHECK-UNEXPAND: # %bb.0:
-; CHECK-UNEXPAND-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,5,6,7]
-; CHECK-UNEXPAND-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,1,2,3]
-; CHECK-UNEXPAND-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT: vpmovsxwd {{.*#+}} xmm0 = [0,512,2048,6144]
; CHECK-UNEXPAND-NEXT: retq
%res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
ret <4 x i32> %res
@@ -79,17 +77,10 @@ define <4 x i32> @test_fshl_none_constant(<4 x i32> %a, <4 x i32> %b, <4 x i32>
}
define <4 x i32> @test_fshr_constants() {
-; CHECK-EXPAND-LABEL: test_fshr_constants:
-; CHECK-EXPAND: # %bb.0:
-; CHECK-EXPAND-NEXT: vmovaps {{.*#+}} xmm0 = [0,8388608,8388608,6291456]
-; CHECK-EXPAND-NEXT: retq
-;
-; CHECK-UNEXPAND-LABEL: test_fshr_constants:
-; CHECK-UNEXPAND: # %bb.0:
-; CHECK-UNEXPAND-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,1,2,3]
-; CHECK-UNEXPAND-NEXT: vpmovsxbd {{.*#+}} xmm0 = [4,5,6,7]
-; CHECK-UNEXPAND-NEXT: vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
-; CHECK-UNEXPAND-NEXT: retq
+; CHECK-LABEL: test_fshr_constants:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [0,8388608,8388608,6291456]
+; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
ret <4 x i32> %res
}
More information about the llvm-commits
mailing list