[llvm] [AMDGPU]: Accept constant zero bytes in v_perm OrCombine (PR #66533)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 6 21:18:43 PST 2024
================
@@ -11610,6 +11610,29 @@ calculateSrcByte(const SDValue Op, uint64_t DestByte, uint64_t SrcIndex = 0,
return calculateSrcByte(Op->getOperand(0), DestByte, SrcIndex, Depth + 1);
}
+ case ISD::EXTRACT_VECTOR_ELT: {
+ auto IdxOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!IdxOp)
+ return std::nullopt;
+ auto VecIdx = IdxOp->getZExtValue();
+ auto ScalarSize = Op.getScalarValueSizeInBits();
+
+ assert((ScalarSize >= 8) && !(ScalarSize % 8));
+
+ if (ScalarSize < 32) {
+ // TODO: support greater than 32 bit sources
+ if ((VecIdx + 1) * ScalarSize > 32)
+ return std::nullopt;
+
+ SrcIndex = VecIdx * ScalarSize / 8 + SrcIndex;
+ return calculateSrcByte(Op->getOperand(0), DestByte, SrcIndex, Depth + 1);
+ }
+
+ // The scalar is 32 bits, so just use the scalar
+ // TODO: support greater than 32 bit sources
+ return ByteProvider<SDValue>::getSrc(Op, DestByte, SrcIndex);
+ }
+
----------------
arsenm wrote:
Extra blank line
https://github.com/llvm/llvm-project/pull/66533
More information about the llvm-commits
mailing list