[llvm] [DAG] Constant fold ISD::FSHL/FSHR nodes (PR #154480)

Fri Aug 22 03:45:29 PDT 2025

https://github.com/XChy updated https://github.com/llvm/llvm-project/pull/154480

>From de442b659229e6da60a9d870a16993b153ed58c1 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Wed, 20 Aug 2025 14:10:12 +0800
Subject: [PATCH 1/5] [SelectionDAG] NFC. Add constant fold testcases for
 fshl/fshr

---
 llvm/test/CodeGen/X86/fshl-fshr-constant.ll | 158 ++++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/fshl-fshr-constant.ll

diff --git a/llvm/test/CodeGen/X86/fshl-fshr-constant.ll b/llvm/test/CodeGen/X86/fshl-fshr-constant.ll
new file mode 100644
index 0000000000000..022af2be316dc
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fshl-fshr-constant.ll
@@ -0,0 +1,158 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,CHECK-EXPAND
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,CHECK-UNEXPAND
+
+define <4 x i32> @test_fshl_constants() {
+; CHECK-EXPAND-LABEL: test_fshl_constants:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vmovaps {{.*#+}} xmm0 = [0,512,2048,6144]
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_constants:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4,5,6,7]
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [0,1,2,3]
+; CHECK-UNEXPAND-NEXT:    vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshl_splat_constants() {
+; CHECK-LABEL: test_fshl_splat_constants:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vbroadcastss {{.*#+}} xmm0 = [256,256,256,256]
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 4, i32 4, i32 4, i32 4>, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshl_two_constants(<4 x i32> %a) {
+; CHECK-EXPAND-LABEL: test_fshl_two_constants:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_two_constants:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4,5,6,7]
+; CHECK-UNEXPAND-NEXT:    vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshl_one_constant(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-EXPAND-LABEL: test_fshl_one_constant:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-EXPAND-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_one_constant:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshl_none_constant(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-EXPAND-LABEL: test_fshl_none_constant:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
+; CHECK-EXPAND-NEXT:    vpandn %xmm3, %xmm2, %xmm4
+; CHECK-EXPAND-NEXT:    vpsrld $1, %xmm1, %xmm1
+; CHECK-EXPAND-NEXT:    vpsrlvd %xmm4, %xmm1, %xmm1
+; CHECK-EXPAND-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; CHECK-EXPAND-NEXT:    vpsllvd %xmm2, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshl_none_constant:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpshldvd %xmm2, %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_constants() {
+; CHECK-EXPAND-LABEL: test_fshr_constants:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vmovaps {{.*#+}} xmm0 = [0,8388608,8388608,6291456]
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_constants:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [0,1,2,3]
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [4,5,6,7]
+; CHECK-UNEXPAND-NEXT:    vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_two_constants(<4 x i32> %a) {
+; CHECK-EXPAND-LABEL: test_fshr_two_constants:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_two_constants:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4,5,6,7]
+; CHECK-UNEXPAND-NEXT:    vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-UNEXPAND-NEXT:    vmovdqa %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_one_constant(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-EXPAND-LABEL: test_fshr_one_constant:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-EXPAND-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_one_constant:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-UNEXPAND-NEXT:    vmovdqa %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_none_constant(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-EXPAND-LABEL: test_fshr_none_constant:
+; CHECK-EXPAND:       # %bb.0:
+; CHECK-EXPAND-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
+; CHECK-EXPAND-NEXT:    vpand %xmm3, %xmm2, %xmm4
+; CHECK-EXPAND-NEXT:    vpsrlvd %xmm4, %xmm1, %xmm1
+; CHECK-EXPAND-NEXT:    vpandn %xmm3, %xmm2, %xmm2
+; CHECK-EXPAND-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    vpsllvd %xmm2, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-EXPAND-NEXT:    retq
+;
+; CHECK-UNEXPAND-LABEL: test_fshr_none_constant:
+; CHECK-UNEXPAND:       # %bb.0:
+; CHECK-UNEXPAND-NEXT:    vpshrdvd %xmm2, %xmm0, %xmm1
+; CHECK-UNEXPAND-NEXT:    vmovdqa %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_fshr_splat_constants() {
+; CHECK-LABEL: test_fshr_splat_constants:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vbroadcastss {{.*#+}} xmm0 = [16777216,16777216,16777216,16777216]
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 4, i32 4, i32 4, i32 4>, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
+  ret <4 x i32> %res
+}

>From d1a1e48806e25cc84236cf7458e5c9196dd059a5 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Wed, 20 Aug 2025 14:11:51 +0800
Subject: [PATCH 2/5] [SelectionDAG] Constant fold ISD:FSHL/FSHR nodes

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  5 ++++
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 28 +++++++++++++++++++
 llvm/test/CodeGen/X86/fshl-fshr-constant.ll   | 19 ++++---------
 3 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cee593def653c..2896c3a583f7d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11281,6 +11281,11 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
   unsigned BitWidth = VT.getScalarSizeInBits();
   SDLoc DL(N);
 
+  // fold (fshl C0, C1, C2) -> C3
+  if (SDValue C =
+          DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
+    return C;
+
   // fold (fshl N0, N1, 0) -> N0
   // fold (fshr N0, N1, 0) -> N1
   if (isPowerOf2_32(BitWidth))
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 531297bfa9a08..53579bfd81fce 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7175,6 +7175,28 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
     }
   }
 
+  // Handle fshl/fshr special cases.
+  if (Opcode == ISD::FSHL || Opcode == ISD::FSHR) {
+    auto *C1 = dyn_cast<ConstantSDNode>(Ops[0]);
+    auto *C2 = dyn_cast<ConstantSDNode>(Ops[1]);
+    auto *C3 = dyn_cast<ConstantSDNode>(Ops[2]);
+
+    if (C1 && C2 && C3) {
+      if (C1->isOpaque() || C2->isOpaque() || C3->isOpaque())
+        return SDValue();
+      const APInt V1 = C1->getAPIntValue(), V2 = C2->getAPIntValue(),
+                  V3 = C3->getAPIntValue();
+
+      APInt FoldedVal = Opcode == ISD::FSHL ? APIntOps::fshl(V1, V2, V3)
+                                            : APIntOps::fshr(V1, V2, V3);
+
+      SDValue Folded = getConstant(FoldedVal, DL, VT);
+      assert((!Folded || !VT.isVector()) &&
+             "Can't fold vectors ops with scalar operands");
+      return Folded;
+    }
+  }
+
   // This is for vector folding only from here on.
   if (!VT.isVector())
     return SDValue();
@@ -8158,6 +8180,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     }
     break;
   }
+  case ISD::FSHL:
+  case ISD::FSHR:
+    // Constant folding.
+    if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2, N3}))
+      return V;
+    break;
   case ISD::BUILD_VECTOR: {
     // Attempt to simplify BUILD_VECTOR.
     SDValue Ops[] = {N1, N2, N3};
diff --git a/llvm/test/CodeGen/X86/fshl-fshr-constant.ll b/llvm/test/CodeGen/X86/fshl-fshr-constant.ll
index 022af2be316dc..fdc34f5665b2b 100644
--- a/llvm/test/CodeGen/X86/fshl-fshr-constant.ll
+++ b/llvm/test/CodeGen/X86/fshl-fshr-constant.ll
@@ -10,9 +10,7 @@ define <4 x i32> @test_fshl_constants() {
 ;
 ; CHECK-UNEXPAND-LABEL: test_fshl_constants:
 ; CHECK-UNEXPAND:       # %bb.0:
-; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [4,5,6,7]
-; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [0,1,2,3]
-; CHECK-UNEXPAND-NEXT:    vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-UNEXPAND-NEXT:    vpmovsxwd {{.*#+}} xmm0 = [0,512,2048,6144]
 ; CHECK-UNEXPAND-NEXT:    retq
   %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
   ret <4 x i32> %res
@@ -79,17 +77,10 @@ define <4 x i32> @test_fshl_none_constant(<4 x i32> %a, <4 x i32> %b, <4 x i32>
 }
 
 define <4 x i32> @test_fshr_constants() {
-; CHECK-EXPAND-LABEL: test_fshr_constants:
-; CHECK-EXPAND:       # %bb.0:
-; CHECK-EXPAND-NEXT:    vmovaps {{.*#+}} xmm0 = [0,8388608,8388608,6291456]
-; CHECK-EXPAND-NEXT:    retq
-;
-; CHECK-UNEXPAND-LABEL: test_fshr_constants:
-; CHECK-UNEXPAND:       # %bb.0:
-; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [0,1,2,3]
-; CHECK-UNEXPAND-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [4,5,6,7]
-; CHECK-UNEXPAND-NEXT:    vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
-; CHECK-UNEXPAND-NEXT:    retq
+; CHECK-LABEL: test_fshr_constants:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovaps {{.*#+}} xmm0 = [0,8388608,8388608,6291456]
+; CHECK-NEXT:    retq
   %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
   ret <4 x i32> %res
 }

>From 189286212ab230cbceaf6194df54efee91e76742 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Fri, 22 Aug 2025 02:24:08 +0800
Subject: [PATCH 3/5] Refactor to folding ternary arithmetic operator

---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 68 ++++++++++---------
 1 file changed, 35 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 53579bfd81fce..f38c1b2a909b8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7197,6 +7197,27 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
     }
   }
 
+  // Handle fma/fmad special cases.
+  if (Opcode == ISD::FMA || Opcode == ISD::FMAD) {
+    assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
+    assert(Ops[0].getValueType() == VT && Ops[1].getValueType() == VT &&
+           Ops[2].getValueType() == VT && "FMA types must match!");
+    ConstantFPSDNode *C1 = dyn_cast<ConstantFPSDNode>(Ops[0]);
+    ConstantFPSDNode *C2 = dyn_cast<ConstantFPSDNode>(Ops[1]);
+    ConstantFPSDNode *C3 = dyn_cast<ConstantFPSDNode>(Ops[2]);
+    if (C1 && C2 && C3) {
+      APFloat V1 = C1->getValueAPF();
+      const APFloat &V2 = C2->getValueAPF();
+      const APFloat &V3 = C3->getValueAPF();
+      if (Opcode == ISD::FMAD) {
+        V1.multiply(V2, APFloat::rmNearestTiesToEven);
+        V1.add(V3, APFloat::rmNearestTiesToEven);
+      } else
+        V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
+      return getConstantFP(V1, DL, VT);
+    }
+  }
+
   // This is for vector folding only from here on.
   if (!VT.isVector())
     return SDValue();
@@ -8159,33 +8180,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
          "Operand is DELETED_NODE!");
   // Perform various simplifications.
   switch (Opcode) {
-  case ISD::FMA:
-  case ISD::FMAD: {
-    assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
-    assert(N1.getValueType() == VT && N2.getValueType() == VT &&
-           N3.getValueType() == VT && "FMA types must match!");
-    ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-    ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
-    ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3);
-    if (N1CFP && N2CFP && N3CFP) {
-      APFloat  V1 = N1CFP->getValueAPF();
-      const APFloat &V2 = N2CFP->getValueAPF();
-      const APFloat &V3 = N3CFP->getValueAPF();
-      if (Opcode == ISD::FMAD) {
-        V1.multiply(V2, APFloat::rmNearestTiesToEven);
-        V1.add(V3, APFloat::rmNearestTiesToEven);
-      } else
-        V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
-      return getConstantFP(V1, DL, VT);
-    }
-    break;
-  }
-  case ISD::FSHL:
-  case ISD::FSHR:
-    // Constant folding.
-    if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2, N3}))
-      return V;
-    break;
   case ISD::BUILD_VECTOR: {
     // Attempt to simplify BUILD_VECTOR.
     SDValue Ops[] = {N1, N2, N3};
@@ -8211,12 +8205,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     // Use FoldSetCC to simplify SETCC's.
     if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
       return V;
-    // Vector constant folding.
-    SDValue Ops[] = {N1, N2, N3};
-    if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, Ops)) {
-      NewSDValueDbgMsg(V, "New node vector constant folding: ", this);
-      return V;
-    }
     break;
   }
   case ISD::SELECT:
@@ -8352,6 +8340,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   }
   }
 
+  // Perform trivial constant folding for arithmetic operators.
+  switch (Opcode) {
+  case ISD::FMA:
+  case ISD::FMAD:
+  case ISD::SETCC:
+  case ISD::BITCAST:
+  case ISD::FSHL:
+  case ISD::FSHR:
+    if (SDValue SV =
+            FoldConstantArithmetic(Opcode, DL, VT, {N1, N2, N3}, Flags))
+      return SV;
+    break;
+  }
+
   // Memoize node if it doesn't produce a glue result.
   SDNode *N;
   SDVTList VTs = getVTList(VT);

>From 8b607f9e1094c342f18c7b2c4ffb9107e58d1b10 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Fri, 22 Aug 2025 18:24:16 +0800
Subject: [PATCH 4/5] add const reference

---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f38c1b2a909b8..1468d61b76c43 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7184,8 +7184,8 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
     if (C1 && C2 && C3) {
       if (C1->isOpaque() || C2->isOpaque() || C3->isOpaque())
         return SDValue();
-      const APInt V1 = C1->getAPIntValue(), V2 = C2->getAPIntValue(),
-                  V3 = C3->getAPIntValue();
+      const APInt &V1 = C1->getAPIntValue(), &V2 = C2->getAPIntValue(),
+                  &V3 = C3->getAPIntValue();
 
       APInt FoldedVal = Opcode == ISD::FSHL ? APIntOps::fshl(V1, V2, V3)
                                             : APIntOps::fshr(V1, V2, V3);

>From aa2f22565e791323e0387651ed3e55c6137c5aa1 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Fri, 22 Aug 2025 18:45:11 +0800
Subject: [PATCH 5/5] remove assertion

---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 1468d61b76c43..47645a10db7f9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7189,11 +7189,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
 
       APInt FoldedVal = Opcode == ISD::FSHL ? APIntOps::fshl(V1, V2, V3)
                                             : APIntOps::fshr(V1, V2, V3);
-
-      SDValue Folded = getConstant(FoldedVal, DL, VT);
-      assert((!Folded || !VT.isVector()) &&
-             "Can't fold vectors ops with scalar operands");
-      return Folded;
+      return getConstant(FoldedVal, DL, VT);
     }
   }