[llvm] X86: make VBMI2 funnel shifts use VSHLD/VSHRD for const splats (PR #169401)

via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 25 06:39:16 PST 2025


https://github.com/ArnavM3434 updated https://github.com/llvm/llvm-project/pull/169401

>From a8b832514cca3d08d7058046a869e3d6e8f72a52 Mon Sep 17 00:00:00 2001
From: Arnav Mehta <arnavnmehta1 at gmail.com>
Date: Mon, 24 Nov 2025 12:39:37 -0500
Subject: [PATCH] X86: make VBMI2 funnel shifts use VSHLD/VSHRD for const
 splats

Move constant splat handling for vector funnel shifts into a DAG combiner
so that VBMI2 legal widths emit VSHLD/VSHRD directly (fixes #166949).

Signed-off-by: Arnav Mehta <arnavnmehta1 at gmail.com>
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  55 +++++-
 llvm/unittests/Target/X86/CMakeLists.txt      |   1 +
 .../Target/X86/X86SelectionDAGTest.cpp        | 163 ++++++++++++++++++
 3 files changed, 215 insertions(+), 4 deletions(-)
 create mode 100644 llvm/unittests/Target/X86/X86SelectionDAGTest.cpp

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index dc84025c166a3..313c30c9d6ed8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2073,8 +2073,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
 
     if (Subtarget.hasVBMI2()) {
       for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
-        setOperationAction(ISD::FSHL, VT, Custom);
-        setOperationAction(ISD::FSHR, VT, Custom);
+        setOperationAction(ISD::FSHL, VT, Legal);
+        setOperationAction(ISD::FSHR, VT, Legal);
       }
 
       setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
@@ -2089,8 +2089,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
     for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,
                     MVT::v4i64}) {
-      setOperationAction(ISD::FSHL, VT, Custom);
-      setOperationAction(ISD::FSHR, VT, Custom);
+      setOperationAction(ISD::FSHL, VT, Subtarget.hasVLX() ? Legal : Custom);
+      setOperationAction(ISD::FSHR, VT, Subtarget.hasVLX() ? Legal : Custom);
     }
   }
 
@@ -2703,6 +2703,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
                        ISD::STRICT_FP_EXTEND,
                        ISD::FP_ROUND,
                        ISD::STRICT_FP_ROUND,
+                       ISD::FSHL,
+                       ISD::FSHR,
                        ISD::INTRINSIC_VOID,
                        ISD::INTRINSIC_WO_CHAIN,
                        ISD::INTRINSIC_W_CHAIN});
@@ -57624,6 +57626,49 @@ static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+// Combiner: turn uniform-constant splat funnel shifts into VSHLD/VSHRD
+static SDValue combineFunnelShift(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const X86Subtarget &Subtarget) {
+
+  SDLoc DL(N);
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  SDValue Amt = N->getOperand(2);
+  EVT VT = Op0.getValueType();
+
+  if (!VT.isVector() || !Subtarget.hasVBMI2())
+    return SDValue();
+
+  // Only combine if the operation is legal for this type.
+  // This ensures we don't try to convert types that need to be
+  // widened/promoted.
+  if (!DAG.getTargetLoweringInfo().isOperationLegal(N->getOpcode(), VT))
+    return SDValue();
+
+  unsigned EltSize = VT.getScalarSizeInBits();
+
+  if (EltSize <= 8)
+    return SDValue();
+
+  APInt ShiftVal;
+  if (!X86::isConstantSplat(Amt, ShiftVal))
+    return SDValue();
+
+  uint64_t ModAmt = ShiftVal.urem(EltSize);
+
+  SDValue Imm = DAG.getTargetConstant(ModAmt, DL, MVT::i8);
+
+  bool IsFSHR = N->getOpcode() == ISD::FSHR;
+
+  if (IsFSHR)
+    std::swap(Op0, Op1);
+
+  unsigned Opcode = IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD;
+
+  return DAG.getNode(Opcode, DL, VT, {Op0, Op1, Imm});
+}
+
 static bool needCarryOrOverflowFlag(SDValue Flags) {
   assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
 
@@ -61228,6 +61273,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::INTRINSIC_VOID:  return combineINTRINSIC_VOID(N, DAG, DCI);
   case ISD::FP_TO_SINT_SAT:
   case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget);
+  case ISD::FSHL: return combineFunnelShift(N, DAG, DCI, Subtarget);
+  case ISD::FSHR: return combineFunnelShift(N, DAG, DCI, Subtarget);
     // clang-format on
   }
 
diff --git a/llvm/unittests/Target/X86/CMakeLists.txt b/llvm/unittests/Target/X86/CMakeLists.txt
index b011681aa3b95..8d425e8ddd3e9 100644
--- a/llvm/unittests/Target/X86/CMakeLists.txt
+++ b/llvm/unittests/Target/X86/CMakeLists.txt
@@ -22,6 +22,7 @@ set(LLVM_LINK_COMPONENTS
   )
 
 add_llvm_unittest(X86Tests
+  X86SelectionDAGTest.cpp
   MachineSizeOptsTest.cpp
   TernlogTest.cpp
   )
diff --git a/llvm/unittests/Target/X86/X86SelectionDAGTest.cpp b/llvm/unittests/Target/X86/X86SelectionDAGTest.cpp
new file mode 100644
index 0000000000000..56521cad56154
--- /dev/null
+++ b/llvm/unittests/Target/X86/X86SelectionDAGTest.cpp
@@ -0,0 +1,163 @@
+//===- FunnelShiftCombineTest.cpp - X86 Funnel Shift Combine Tests --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+class X86FunnelShiftCombineTest : public testing::Test {
+protected:
+  static void SetUpTestCase() {
+    LLVMInitializeX86TargetInfo();
+    LLVMInitializeX86Target();
+    LLVMInitializeX86TargetMC();
+  }
+
+  void SetUp() override {
+    Triple TargetTriple("x86_64-unknown-unknown");
+    std::string Error;
+    const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error);
+    if (!T)
+      GTEST_SKIP();
+
+    TargetOptions Options;
+    // Enable VBMI2 to test funnel shift combines
+    TM = std::unique_ptr<TargetMachine>(T->createTargetMachine(
+        TargetTriple, "", "+avx512f,+avx512vbmi2", Options, std::nullopt,
+        std::nullopt, CodeGenOptLevel::Default));
+    if (!TM)
+      GTEST_SKIP();
+
+    StringRef Assembly = "define void @test() { ret void }";
+    SMDiagnostic SMError;
+    M = parseAssemblyString(Assembly, SMError, Context);
+    ASSERT_TRUE(M && "Could not parse module!");
+    M->setDataLayout(TM->createDataLayout());
+
+    F = M->getFunction("test");
+    ASSERT_TRUE(F && "Could not get function test!");
+
+    MachineModuleInfo MMI(TM.get());
+    MF = std::make_unique<MachineFunction>(*F, *TM, *TM->getSubtargetImpl(*F),
+                                           MMI.getContext(), 0);
+
+    DAG = std::make_unique<SelectionDAG>(*TM, CodeGenOptLevel::Default);
+    ASSERT_TRUE(DAG && "Failed to create SelectionDAG!");
+    OptimizationRemarkEmitter ORE(F);
+    DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr, MMI,
+              nullptr);
+  }
+
+  LLVMContext Context;
+  std::unique_ptr<TargetMachine> TM;
+  std::unique_ptr<Module> M;
+  Function *F;
+  std::unique_ptr<MachineFunction> MF;
+  std::unique_ptr<SelectionDAG> DAG;
+};
+
+// Test that v16i32 is legal for VBMI2 (should be combined)
+TEST_F(X86FunnelShiftCombineTest, TestFSHLv16i32Legal) {
+  MVT VT = MVT::v16i32;
+  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+  EXPECT_TRUE(TLI.isOperationLegal(ISD::FSHL, VT));
+  EXPECT_TRUE(TLI.isOperationLegal(ISD::FSHR, VT));
+}
+
+// Test that v8i64 is legal for VBMI2 (should be combined)
+TEST_F(X86FunnelShiftCombineTest, TestFSHRv8i64Legal) {
+  MVT VT = MVT::v8i64;
+  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+  EXPECT_TRUE(TLI.isOperationLegal(ISD::FSHL, VT));
+  EXPECT_TRUE(TLI.isOperationLegal(ISD::FSHR, VT));
+}
+
+// Test that v2i32 is NOT legal for VBMI2 (should NOT be combined)
+TEST_F(X86FunnelShiftCombineTest, TestFSHLv2i32NonLegal) {
+  MVT VT = MVT::v2i32;
+  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+  EXPECT_FALSE(TLI.isOperationLegal(ISD::FSHL, VT));
+  EXPECT_FALSE(TLI.isOperationLegal(ISD::FSHR, VT));
+}
+
+// Test that v32i16 is legal for VBMI2 (should be combined)
+TEST_F(X86FunnelShiftCombineTest, TestFSHLv32i16Legal) {
+  MVT VT = MVT::v32i16;
+  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+  EXPECT_TRUE(TLI.isOperationLegal(ISD::FSHL, VT));
+  EXPECT_TRUE(TLI.isOperationLegal(ISD::FSHR, VT));
+}
+
+// Test that v8i16 with VLX is legal
+TEST_F(X86FunnelShiftCombineTest, TestFSHLv8i16WithVLX) {
+  Triple TargetTriple("x86_64-unknown-unknown");
+  std::string Error;
+  const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error);
+  ASSERT_TRUE(T);
+
+  TargetOptions Options;
+  TM = std::unique_ptr<TargetMachine>(T->createTargetMachine(
+      TargetTriple, "", "+avx512f,+avx512vbmi2,+avx512vl", Options,
+      std::nullopt, std::nullopt, CodeGenOptLevel::Default));
+  ASSERT_TRUE(TM);
+
+  MachineModuleInfo MMI(TM.get());
+  MF = std::make_unique<MachineFunction>(*F, *TM, *TM->getSubtargetImpl(*F),
+                                         MMI.getContext(), 0);
+  DAG = std::make_unique<SelectionDAG>(*TM, CodeGenOptLevel::Default);
+  OptimizationRemarkEmitter ORE(F);
+  DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr, MMI,
+            nullptr);
+
+  MVT VT = MVT::v8i16;
+  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+  EXPECT_TRUE(TLI.isOperationLegal(ISD::FSHL, VT));
+  EXPECT_TRUE(TLI.isOperationLegal(ISD::FSHR, VT));
+}
+
+// Test that v4i32 with VLX is legal
+TEST_F(X86FunnelShiftCombineTest, TestFSHLv4i32WithVLX) {
+  Triple TargetTriple("x86_64-unknown-unknown");
+  std::string Error;
+  const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error);
+  ASSERT_TRUE(T);
+
+  TargetOptions Options;
+  TM = std::unique_ptr<TargetMachine>(T->createTargetMachine(
+      TargetTriple, "", "+avx512f,+avx512vbmi2,+avx512vl", Options,
+      std::nullopt, std::nullopt, CodeGenOptLevel::Default));
+  ASSERT_TRUE(TM);
+
+  MachineModuleInfo MMI(TM.get());
+  MF = std::make_unique<MachineFunction>(*F, *TM, *TM->getSubtargetImpl(*F),
+                                         MMI.getContext(), 0);
+  DAG = std::make_unique<SelectionDAG>(*TM, CodeGenOptLevel::Default);
+  OptimizationRemarkEmitter ORE(F);
+  DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr, MMI,
+            nullptr);
+
+  MVT VT = MVT::v4i32;
+  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+  EXPECT_TRUE(TLI.isOperationLegal(ISD::FSHL, VT));
+  EXPECT_TRUE(TLI.isOperationLegal(ISD::FSHR, VT));
+}
+
+} // namespace



More information about the llvm-commits mailing list