[llvm-branch-commits] [llvm] AMDGPU: Make vector_shuffle legal for v2i32 with v_pk_mov_b32 (PR #123684)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jan 21 01:34:33 PST 2025
================
@@ -489,6 +489,90 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
}
+void AMDGPUDAGToDAGISel::SelectVectorShuffle(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+
+ // TODO: Handle 16-bit element vectors with even aligned masks.
+ if (!Subtarget->hasPkMovB32() || !EltVT.bitsEq(MVT::i32) ||
+ VT.getVectorNumElements() != 2) {
+ SelectCode(N);
+ return;
+ }
+
+ auto *SVN = cast<ShuffleVectorSDNode>(N);
+
+ SDValue Src0 = SVN->getOperand(0);
+ SDValue Src1 = SVN->getOperand(1);
+ ArrayRef<int> Mask = SVN->getMask();
+ SDLoc DL(N);
+
+ assert(Src0.getValueType().getVectorNumElements() == 2 && Mask.size() == 2 &&
+ Mask[0] < 4 && Mask[1] < 4);
+
+ SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1;
+ SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1;
+ unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
+ unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
+
+ if (Mask[0] < 0) {
+ Src0SubReg = Src1SubReg;
+ MachineSDNode *ImpDef =
+ CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
+ VSrc0 = SDValue(ImpDef, 0);
+ }
+
+ if (Mask[1] < 0) {
+ Src1SubReg = Src0SubReg;
+ MachineSDNode *ImpDef =
+ CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
+ VSrc1 = SDValue(ImpDef, 0);
+ }
+
+ // SGPR case needs to lower to copies.
+ //
+ // Also use subregister extract when we can directly blend the registers with
+ // a simple subregister copy.
+ //
+ // TODO: Maybe we should fold this out earlier
+ if (N->isDivergent() && Src0SubReg == AMDGPU::sub1 &&
+ Src1SubReg == AMDGPU::sub0) {
+ // The low element of the result always comes from src0.
+ // The high element of the result always comes from src1.
+ // op_sel selects the high half of src0.
+ // op_sel_hi selects the high half of src1.
+
+ unsigned Src0OpSel =
+ Src0SubReg == AMDGPU::sub1 ? SISrcMods::OP_SEL_0 : SISrcMods::NONE;
+ unsigned Src1OpSel =
+ Src1SubReg == AMDGPU::sub1 ? SISrcMods::OP_SEL_0 : SISrcMods::NONE;
----------------
arsenm wrote:
I'm not sure this is correctly encoded. I'm confused by how op_sel and op_sel_hi are supposed to be represented. We set fields in the source modifiers. I guess this should probably be OP_SEL_1?
https://github.com/llvm/llvm-project/pull/123684
More information about the llvm-branch-commits
mailing list