[llvm] 753f47d - [X86] Make VBMI2 funnel shifts use VSHLD/VSHRD for const splats (#169401)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 2 03:40:06 PST 2025
Author: ArnavM3434
Date: 2025-12-02T11:40:01Z
New Revision: 753f47d6a5043b32f6eebf467cca26f5e1a0611a
URL: https://github.com/llvm/llvm-project/commit/753f47d6a5043b32f6eebf467cca26f5e1a0611a
DIFF: https://github.com/llvm/llvm-project/commit/753f47d6a5043b32f6eebf467cca26f5e1a0611a.diff
LOG: [X86] Make VBMI2 funnel shifts use VSHLD/VSHRD for const splats (#169401)
Make ISD::FSHL/FSHR legal on VBMI2 vector targets and convert to VSHLD/VSHRD in a combine
closes #166949
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ed2539d163d11..427c18a4bb576 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2073,8 +2073,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasVBMI2()) {
for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
- setOperationAction(ISD::FSHL, VT, Custom);
- setOperationAction(ISD::FSHR, VT, Custom);
+ setOperationAction(ISD::FSHL, VT, Legal);
+ setOperationAction(ISD::FSHR, VT, Legal);
}
setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
@@ -2089,8 +2089,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,
MVT::v4i64}) {
- setOperationAction(ISD::FSHL, VT, Custom);
- setOperationAction(ISD::FSHR, VT, Custom);
+ setOperationAction(ISD::FSHL, VT, Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::FSHR, VT, Subtarget.hasVLX() ? Legal : Custom);
}
}
@@ -2709,6 +2709,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::STRICT_FP_EXTEND,
ISD::FP_ROUND,
ISD::STRICT_FP_ROUND,
+ ISD::FSHL,
+ ISD::FSHR,
ISD::INTRINSIC_VOID,
ISD::INTRINSIC_WO_CHAIN,
ISD::INTRINSIC_W_CHAIN});
@@ -31322,19 +31324,15 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
bool IsCstSplat = X86::isConstantSplat(Amt, APIntShiftAmt);
unsigned NumElts = VT.getVectorNumElements();
- if (Subtarget.hasVBMI2() && EltSizeInBits > 8) {
-
- if (IsCstSplat) {
- if (IsFSHR)
- std::swap(Op0, Op1);
- uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits);
- SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8);
- return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
- {Op0, Op1, Imm}, DAG, Subtarget);
- }
+ // For non-VLX VBMI2 targets, widen 128/256-bit to 512-bit so
+ // the rest of the lowering/isel can select the VBMI2 forms.
+ // Only Custom types (v8i16, v4i32, v2i64, v16i16, v8i32, v4i64) can
+ // reach LowerFunnelShift with VBMI2 but no VLX, so no type check needed.
+ if (Subtarget.hasVBMI2() && !Subtarget.hasVLX() && EltSizeInBits > 8) {
return getAVX512Node(IsFSHR ? ISD::FSHR : ISD::FSHL, DL, VT,
{Op0, Op1, Amt}, DAG, Subtarget);
}
+
assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||
VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16 ||
VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) &&
@@ -57637,6 +57635,40 @@ static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Combiner: turn uniform-constant splat funnel shifts into VSHLD/VSHRD
+static SDValue combineFunnelShift(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ SDLoc DL(N);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Amt = N->getOperand(2);
+ EVT VT = Op0.getValueType();
+
+ if (!VT.isVector())
+ return SDValue();
+
+ // Only combine if the operation is legal for this type.
+ // This ensures we don't try to convert types that need to be
+ // widened/promoted.
+ if (!DAG.getTargetLoweringInfo().isOperationLegal(N->getOpcode(), VT))
+ return SDValue();
+
+ unsigned EltSize = VT.getScalarSizeInBits();
+ APInt ShiftVal;
+ if (!X86::isConstantSplat(Amt, ShiftVal))
+ return SDValue();
+
+ uint64_t ModAmt = ShiftVal.urem(EltSize);
+ SDValue Imm = DAG.getTargetConstant(ModAmt, DL, MVT::i8);
+ bool IsFSHR = N->getOpcode() == ISD::FSHR;
+
+ if (IsFSHR)
+ std::swap(Op0, Op1);
+ unsigned Opcode = IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD;
+ return DAG.getNode(Opcode, DL, VT, {Op0, Op1, Imm});
+}
+
static bool needCarryOrOverflowFlag(SDValue Flags) {
assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
@@ -61279,6 +61311,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::INTRINSIC_VOID: return combineINTRINSIC_VOID(N, DAG, DCI);
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget);
+ case ISD::FSHL:
+ case ISD::FSHR: return combineFunnelShift(N, DAG, DCI, Subtarget);
// clang-format on
}
More information about the llvm-commits
mailing list