[llvm] X86: add some missing lowerings for shuffles on `bf16` element type. (PR #76076)
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 22 19:37:50 PST 2023
================
@@ -13932,28 +13933,30 @@ static SDValue lowerV8F16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
const APInt &Zeroable, SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- assert(V1.getSimpleValueType() == MVT::v8f16 && "Bad operand type!");
- assert(V2.getSimpleValueType() == MVT::v8f16 && "Bad operand type!");
+ assert((V1.getSimpleValueType() == MVT::v8f16 ||
+ V1.getSimpleValueType() == MVT::v8bf16) &&
+ "Bad operand type!");
+ assert(V2.getSimpleValueType() == V2.getSimpleValueType());
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
int NumV2Elements = count_if(Mask, [](int M) { return M >= 8; });
-
- if (Subtarget.hasFP16()) {
+ if ((V1.getSimpleValueType() == MVT::v8f16 && Subtarget.hasFP16()) ||
+ (V1.getSimpleValueType() == MVT::v8bf16 && Subtarget.hasBF16())) {
if (NumV2Elements == 0) {
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f16, V1, V2,
- Mask, Subtarget, DAG))
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(
+ DL, V1.getSimpleValueType(), V1, V2, Mask, Subtarget, DAG))
return Broadcast;
}
if (NumV2Elements == 1 && Mask[0] >= 8)
if (SDValue V = lowerShuffleAsElementInsertion(
- DL, MVT::v8f16, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ DL, V1.getSimpleValueType(), V1, V2, Mask, Zeroable, Subtarget,
+ DAG))
return V;
}
-
- V1 = DAG.getBitcast(MVT::v8i16, V1);
- V2 = DAG.getBitcast(MVT::v8i16, V2);
- return DAG.getBitcast(MVT::v8f16,
- DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, Mask));
+ return DAG.getBitcast(
+ V1.getSimpleValueType(),
+ DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1),
+ DAG.getBitcast(MVT::v8i16, V2), Mask));
----------------
phoebewang wrote:
I think we can move this out of this function and do it similar to `v16bf16/v32bf16`. But I'm not sure if we really need it, below test passes without this change https://godbolt.org/z/Koq16zGeM
https://github.com/llvm/llvm-project/pull/76076
More information about the llvm-commits
mailing list