[llvm] 63f1ca1 - [X86] Generalize combineVectorTruncationWithPACKUS/combineVectorTruncationWithPACKSS and reuse in LowerTRUNCATE
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 26 04:40:12 PDT 2023
Author: Simon Pilgrim
Date: 2023-06-26T12:39:43+01:00
New Revision: 63f1ca11a6fe6c24a5793afe0992820495075e54
URL: https://github.com/llvm/llvm-project/commit/63f1ca11a6fe6c24a5793afe0992820495075e54
DIFF: https://github.com/llvm/llvm-project/commit/63f1ca11a6fe6c24a5793afe0992820495075e54.diff
LOG: [X86] Generalize combineVectorTruncationWithPACKUS/combineVectorTruncationWithPACKSS and reuse in LowerTRUNCATE
Rename combineVectorTruncationWithPACK* to truncateVectorWithPACK* and split the operands so LowerTRUNCATE can more easily use them.
Noticed while investigating some regressions in D152928 due to us trying to truncate to PACKUS/PACKSS instructions too early
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d833f85b02268..7adcf4704871d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22815,6 +22815,30 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
return truncateVectorWithPACK(Opcode, DstVT, Res, DL, DAG, Subtarget);
}
+/// Truncate using ISD::AND mask and X86ISD::PACKUS.
+/// e.g. trunc <8 x i32> X to <8 x i16> -->
+/// MaskX = X & 0xffff (clear high bits to prevent saturation)
+/// packus (extract_subv MaskX, 0), (extract_subv MaskX, 1)
+static SDValue truncateVectorWithPACKUS(EVT DstVT, SDValue In, const SDLoc &DL,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ EVT SrcVT = In.getValueType();
+ APInt Mask = APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
+ DstVT.getScalarSizeInBits());
+ In = DAG.getNode(ISD::AND, DL, SrcVT, In, DAG.getConstant(Mask, DL, SrcVT));
+ return truncateVectorWithPACK(X86ISD::PACKUS, DstVT, In, DL, DAG, Subtarget);
+}
+
+/// Truncate using inreg sign extension and X86ISD::PACKSS.
+static SDValue truncateVectorWithPACKSS(EVT DstVT, SDValue In, const SDLoc &DL,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ EVT SrcVT = In.getValueType();
+ In = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, SrcVT, In,
+ DAG.getValueType(DstVT));
+ return truncateVectorWithPACK(X86ISD::PACKSS, DstVT, In, DL, DAG, Subtarget);
+}
+
static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -23033,16 +23057,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getBitcast(MVT::v8i16, res);
}
- if (VT == MVT::v16i8 && InVT == MVT::v16i16) {
- // Use an AND to zero uppper bits for PACKUS.
- In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(255, DL, InVT));
-
- SDValue InLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In,
- DAG.getIntPtrConstant(0, DL));
- SDValue InHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In,
- DAG.getIntPtrConstant(8, DL));
- return DAG.getNode(X86ISD::PACKUS, DL, VT, InLo, InHi);
- }
+ if (VT == MVT::v16i8 && InVT == MVT::v16i16)
+ return truncateVectorWithPACKUS(VT, In, DL, Subtarget, DAG);
llvm_unreachable("All 256->128 cases should have been handled above!");
}
@@ -52994,35 +53010,6 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// Truncate using ISD::AND mask and X86ISD::PACKUS.
-/// e.g. trunc <8 x i32> X to <8 x i16> -->
-/// MaskX = X & 0xffff (clear high bits to prevent saturation)
-/// packus (extract_subv MaskX, 0), (extract_subv MaskX, 1)
-static SDValue combineVectorTruncationWithPACKUS(SDNode *N, const SDLoc &DL,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- SDValue In = N->getOperand(0);
- EVT InVT = In.getValueType();
- EVT OutVT = N->getValueType(0);
-
- APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(),
- OutVT.getScalarSizeInBits());
- In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
- return truncateVectorWithPACK(X86ISD::PACKUS, OutVT, In, DL, DAG, Subtarget);
-}
-
-/// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS.
-static SDValue combineVectorTruncationWithPACKSS(SDNode *N, const SDLoc &DL,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- SDValue In = N->getOperand(0);
- EVT InVT = In.getValueType();
- EVT OutVT = N->getValueType(0);
- In = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, InVT, In,
- DAG.getValueType(OutVT));
- return truncateVectorWithPACK(X86ISD::PACKSS, OutVT, In, DL, DAG, Subtarget);
-}
-
/// This function transforms truncation from vXi32/vXi64 to vXi8/vXi16 into
/// X86ISD::PACKUS/X86ISD::PACKSS operations. We do it here because after type
/// legalization the truncation will be translated into a BUILD_VECTOR with each
@@ -53066,9 +53053,9 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
// for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to
// truncate 2 x v4i32 to v8i16.
if (Subtarget.hasSSE41() || OutSVT == MVT::i8)
- return combineVectorTruncationWithPACKUS(N, DL, Subtarget, DAG);
+ return truncateVectorWithPACKUS(OutVT, In, DL, Subtarget, DAG);
if (InSVT == MVT::i32)
- return combineVectorTruncationWithPACKSS(N, DL, Subtarget, DAG);
+ return truncateVectorWithPACKSS(OutVT, In, DL, Subtarget, DAG);
return SDValue();
}
More information about the llvm-commits
mailing list