[llvm] [AMDGPU] Add new llvm.amdgcn.wave.shuffle intrinsic (PR #167372)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 19 09:28:47 PST 2025
================
@@ -7269,6 +7269,83 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
return DAG.getBitcast(VT, UnrolledLaneOp);
}
+static SDValue lowerSubgroupShuffle(const SITargetLowering &TLI, SDNode *N,
+ SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ unsigned ValSize = VT.getSizeInBits();
+ SDLoc SL(N);
+
+ SDValue Value = N->getOperand(1);
+ SDValue Index = N->getOperand(2);
+
+ // ds_bpermute requires index to be multiplied by 4
+ SDValue ShiftAmount = DAG.getTargetConstant(2, SL, MVT::i32);
+ SDValue ShiftedIndex = DAG.getNode(ISD::SHL, SL, Index.getValueType(), Index,
+ ShiftAmount);
+
+ // Intrinsics will require i32 to operate on
+ SDValue Value32 = Value;
+ if ((ValSize != 32) || (VT.isFloatingPoint()))
+ Value32 = DAG.getBitcast(MVT::i32, Value);
+
+ auto MakeIntrinsic = [&DAG, &SL](unsigned IID, MVT RetVT,
+ SmallVector<SDValue> IntrinArgs) -> SDValue {
+ SmallVector<SDValue> Operands(1);
+ Operands[0] = DAG.getTargetConstant(IID, SL, MVT::i32);
+ Operands.append(IntrinArgs);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, RetVT, Operands);
+ };
+
+ if (TLI.getSubtarget()->supportsWaveWideBPermute()) {
+ // If we can bpermute across the whole wave, then just do that
+ SDValue BPermute = MakeIntrinsic(Intrinsic::amdgcn_ds_bpermute, MVT::i32,
+ {ShiftedIndex, Value32});
+ return DAG.getBitcast(VT, BPermute);
+ } else {
----------------
saxlungs wrote:
Refactored
https://github.com/llvm/llvm-project/pull/167372
More information about the llvm-commits
mailing list