[llvm] [X86] Improve variable 8-bit shifts on AVX512BW (PR #164136)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 3 05:25:53 PST 2025
================
@@ -30968,6 +30968,68 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(X86ISD::PACKUS, dl, VT, LoR, HiR);
}
+ if (VT == MVT::v64i8 && Subtarget.canExtendTo512BW()) {
+ // On AVX512BW, we can use variable 16-bit shifts to implement variable
+ // 8-bit shifts. For this, we split the input into two vectors, RLo and RHi.
+ // The i-th lane of RLo contains the (2*i)-th lane of R, and the i-th lane
+ // of RHi contains the (2*i+1)-th lane of R. After shifting, these vectors
+ // can efficiently be merged together using a masked move.
+ MVT ExtVT = MVT::v32i16;
+
+ SDValue RLo, RHi;
+ // Isolate lower and upper lanes of Amt by masking odd lanes in AmtLo and
+ // right shifting AmtHi.
+ SDValue AmtLo = DAG.getNode(ISD::AND, dl, ExtVT, DAG.getBitcast(ExtVT, Amt),
+ DAG.getConstant(0x00ff, dl, ExtVT));
+ SDValue AmtHi = getTargetVShiftByConstNode(
+ X86ISD::VSRLI, dl, ExtVT, DAG.getBitcast(ExtVT, Amt), 8, DAG);
+ unsigned int ShiftOp;
+ switch (Opc) {
+ case ISD::SHL:
+ // Because we shift left, no bits from the high half can influence the low
+ // half, so we don't need to mask RLo. We do however need to mask RHi, to
+ // prevent high bits of an even lane overflowing into low bits of an odd
+ // lane.
+ RLo = DAG.getBitcast(ExtVT, R);
+ RHi = DAG.getNode(ISD::AND, dl, ExtVT, RLo,
+ DAG.getConstant(0xff00, dl, ExtVT));
+ ShiftOp = X86ISD::VSHLV;
+ break;
+ case ISD::SRL:
+ // Same idea as above, but this time we need to make sure no low bits of
+ // an odd lane can overflow into high bits of an even lane.
+ RHi = DAG.getBitcast(ExtVT, R);
+ RLo = DAG.getNode(ISD::AND, dl, ExtVT, RHi,
+ DAG.getConstant(0x00ff, dl, ExtVT));
+ ShiftOp = X86ISD::VSRLV;
+ break;
+ case ISD::SRA:
+ // For arithmetic right shifts, we want to sign extend each even lane of R
+ // such that the upper half of the corresponding lane of RLo is 0 or -1
+ // depending on the sign bit of the original lane. We do this using 2
+ // immediate shifts.
+ RHi = DAG.getBitcast(ExtVT, R);
+ RLo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ExtVT, RHi, 8, DAG);
+ RLo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExtVT, RLo, 8, DAG);
+ ShiftOp = X86ISD::VSRAV;
+ break;
+ default:
+ llvm_unreachable("Unexpected Shift Op");
+ return SDValue();
----------------
RKSimon wrote:
(unnecessary) - drop the return
https://github.com/llvm/llvm-project/pull/164136
More information about the llvm-commits
mailing list