[llvm] [GlobalISel] Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a (PR #115377)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 8 07:37:09 PST 2024
================
@@ -7726,3 +7726,65 @@ bool CombinerHelper::matchShuffleUndefRHS(MachineInstr &MI,
return true;
}
+
+static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) {
+ const unsigned MaskSize = Mask.size();
+ for (unsigned I = 0; I < MaskSize; ++I) {
+ int Idx = Mask[I];
+ if (Idx < 0)
+ continue;
+
+ if (Idx < (int)NumElems)
+ Mask[I] = Idx + NumElems;
+ else
+ Mask[I] = Idx - NumElems;
+ }
+}
+
+bool CombinerHelper::matchShuffleDisjointMask(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+
+ auto &Shuffle = cast<GShuffleVector>(MI);
+ // If any of the two inputs is already undef, don't check the mask again to
+ // prevent infinite loop
+ if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
+ return false;
+
+ if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
+ return false;
+
+ ArrayRef<int> Mask = Shuffle.getMask();
+ const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
+
+ const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
+
+ bool TouchesSrc1 = false;
+ bool TouchesSrc2 = false;
+ const unsigned NumElems = Mask.size();
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+ if (Mask[Idx] < 0)
+ continue;
+
+ if (Mask[Idx] < (int)NumSrcElems)
+ TouchesSrc1 = true;
+ else
+ TouchesSrc2 = true;
+ }
+
+ if (!(TouchesSrc1 ^ TouchesSrc2))
+ return false;
+
+ Register NewSrc1 = Shuffle.getSrc1Reg();
+ SmallVector<int, 16> NewMask(Mask);
+ if (TouchesSrc2) {
+ NewSrc1 = Shuffle.getSrc2Reg();
+ commuteMask(NewMask, NumSrcElems);
+ }
+
+ MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
+ Register Undef = B.buildUndef(Src1Ty).getReg(0);
----------------
davemgreen wrote:
> Maybe we should have a rule that undef is always legal for every type. I can't see why it would be any use to make it illegal.
That might be a nice idea, and I was wondering the same thing for AArch64 recently. At least treating all "things of legal size" as legal implicitdef, like v2i16 that would not otherwise be legal but is the same as a i32. We here be able to say that if a value already exists as the input to a combine then an implicit def of that type should always exist (or be legalized later). Checking the legality of the implicit def sounds like something that shouldn't even fail.
https://github.com/llvm/llvm-project/pull/115377
More information about the llvm-commits
mailing list