[llvm] [X86] Make ISD::ROTL/ROTR vector rotates legal on XOP+AVX512 targets (PR #184587)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 08:16:36 PST 2026
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/184587
>From 1205ff84544826cb4f9730b9fd1b261ee88700a1 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 4 Mar 2026 11:08:46 +0000
Subject: [PATCH 1/2] [X86] Make ISD::ROTL/ROTR vector rotates legal on
XOP+AVX512 targets
Similar to what we did for funnel shifts on #166949 - set vector rotates as legal on XOP (128-bit ROTL) and AVX512 (vXi32/vXi64 ROTL/ROTR) targets, and custom fold to X86ISD::VROTLI/VROTRI as a later fixup.
128/256-bit vector widening to 512-bit instructions is already fully supported + tested on AVX512F-only targets
First part of #184002
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 6 ++
llvm/lib/Target/X86/X86ISelLowering.cpp | 75 ++++++++++---------
2 files changed, 44 insertions(+), 37 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 088e6726fea58..6efc4b7ffd670 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2337,6 +2337,12 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
+
+ // rot*(x, 0) --> x
+ APInt AmtMask = APInt(Known2.getBitWidth(), BitWidth);
+ KnownBits Amt = KnownBits::urem(Known2, KnownBits::makeConstant(AmtMask));
+ if (Amt.isZero())
+ return TLO.CombineTo(Op, Op0);
}
break;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6384c4d58a480..2612befd71b15 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1451,9 +1451,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
- MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
- setOperationAction(ISD::ROTL, VT, Custom);
+ for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
+ MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
+ setOperationAction(ISD::ROTL, VT, VT.is128BitVector() ? Legal : Custom);
setOperationAction(ISD::ROTR, VT, Custom);
}
@@ -2035,6 +2035,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
+ setOperationAction(ISD::ROTL, VT, Legal);
+ setOperationAction(ISD::ROTR, VT, Legal);
setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::CTPOP, VT, Custom);
}
@@ -2199,8 +2201,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
- setOperationAction(ISD::ROTL, VT, Custom);
- setOperationAction(ISD::ROTR, VT, Custom);
+ setOperationAction(ISD::ROTL, VT, Legal);
+ setOperationAction(ISD::ROTR, VT, Legal);
}
// Custom legalize 2x32 to get a little better code.
@@ -2778,6 +2780,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::STRICT_FP_EXTEND,
ISD::FP_ROUND,
ISD::STRICT_FP_ROUND,
+ ISD::ROTL,
+ ISD::ROTR,
ISD::FSHL,
ISD::FSHR,
ISD::INTRINSIC_VOID,
@@ -31938,20 +31942,6 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
return R;
- // AVX512 implicitly uses modulo rotation amounts.
- if ((Subtarget.hasVLX() || Subtarget.hasAVX512()) && 32 <= EltSizeInBits) {
- // Attempt to rotate by immediate.
- if (IsCstSplat) {
- unsigned RotOpc = IsROTL ? X86ISD::VROTLI : X86ISD::VROTRI;
- uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits);
- return DAG.getNode(RotOpc, DL, VT, R,
- DAG.getTargetConstant(RotAmt, DL, MVT::i8));
- }
-
- // Else, fall-back on VPROLV/VPRORV.
- return Op;
- }
-
// AVX512 VBMI2 vXi16 - lower to funnel shifts.
if (Subtarget.hasVBMI2() && 16 == EltSizeInBits) {
unsigned FunnelOpc = IsROTL ? ISD::FSHL : ISD::FSHR;
@@ -31985,24 +31975,6 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
if (VT.is256BitVector() && (Subtarget.hasXOP() || !Subtarget.hasAVX2()))
return splitVectorIntBinary(Op, DAG, DL);
- // XOP has 128-bit vector variable + immediate rotates.
- // +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL.
- // XOP implicitly uses modulo rotation amounts.
- if (Subtarget.hasXOP()) {
- assert(IsROTL && "Only ROTL expected");
- assert(VT.is128BitVector() && "Only rotate 128-bit vectors!");
-
- // Attempt to rotate by immediate.
- if (IsCstSplat) {
- uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits);
- return DAG.getNode(X86ISD::VROTLI, DL, VT, R,
- DAG.getTargetConstant(RotAmt, DL, MVT::i8));
- }
-
- // Use general rotate by variable (per-element).
- return Op;
- }
-
// Rotate by an uniform constant - expand back to shifts.
// TODO: Can't use generic expansion as UNDEF amt elements can be converted
// to other values when folded to shift amounts, losing the splat.
@@ -58629,6 +58601,33 @@ static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Turn uniform-constant splat rotates into VROTLI/VROTRI
+static SDValue combineRotate(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDLoc DL(N);
+ SDValue Src = N->getOperand(0);
+ SDValue Amt = N->getOperand(1);
+ EVT VT = Src.getValueType();
+
+ // Only combine if the operation is legal for this vector type.
+ // This ensures we don't try to convert types that need to be
+ // widened/promoted or prematurely convert to VROTLI/VROTRI before generic
+ // folds have a chance.
+ if (!VT.isVector() || !DCI.isAfterLegalizeDAG() ||
+ !DAG.getTargetLoweringInfo().isOperationLegal(N->getOpcode(), VT))
+ return SDValue();
+
+ APInt RotateVal;
+ if (!X86::isConstantSplat(Amt, RotateVal))
+ return SDValue();
+
+ bool IsROTR = N->getOpcode() == ISD::ROTR;
+ unsigned Opcode = IsROTR ? X86ISD::VROTRI : X86ISD::VROTLI;
+ uint64_t ModAmt = RotateVal.urem(VT.getScalarSizeInBits());
+ SDValue Imm = DAG.getTargetConstant(ModAmt, DL, MVT::i8);
+ return DAG.getNode(Opcode, DL, VT, {Src, Imm});
+}
+
// Combiner: turn uniform-constant splat funnel shifts into VSHLD/VSHRD
static SDValue combineFunnelShift(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -62380,6 +62379,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::OR: return combineOr(N, DAG, DCI, Subtarget);
case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget);
case ISD::BITREVERSE: return combineBITREVERSE(N, DAG, DCI, Subtarget);
+ case ISD::ROTL:
+ case ISD::ROTR: return combineRotate(N, DAG, DCI);
case ISD::AVGCEILS:
case ISD::AVGCEILU:
case ISD::AVGFLOORS:
>From e8fb34b8b97fead56e98b590fb09ab8e889b0779 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 4 Mar 2026 16:16:19 +0000
Subject: [PATCH 2/2] Drop rot*(x, 0) fold
---
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 6 ------
1 file changed, 6 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 6efc4b7ffd670..088e6726fea58 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2337,12 +2337,6 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
-
- // rot*(x, 0) --> x
- APInt AmtMask = APInt(Known2.getBitWidth(), BitWidth);
- KnownBits Amt = KnownBits::urem(Known2, KnownBits::makeConstant(AmtMask));
- if (Amt.isZero())
- return TLO.CombineTo(Op, Op0);
}
break;
}
More information about the llvm-commits
mailing list