[llvm] [X86] Use GFNI for LZCNT vXi8 ops (PR #141888)
via llvm-commits
llvm-commits at lists.llvm.org
Sat May 31 08:39:04 PDT 2025
https://github.com/houngkoungting updated https://github.com/llvm/llvm-project/pull/141888
>From 539e08816f2da3d75560701405c58e47823f8b17 Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Wed, 28 May 2025 23:47:45 +0800
Subject: [PATCH 1/2] Add GFNI-based LZCNT lowering for vXi8
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 5340 ++++++++++++-----------
llvm/test/CodeGen/X86/ctlz-gfni.ll | 15 +
llvm/test/CodeGen/X86/gfni-lzcnt.ll | 635 ++-
3 files changed, 2996 insertions(+), 2994 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/ctlz-gfni.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 99a82cab384aa..7918a8e72adf6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -170,14 +170,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
static const struct {
const RTLIB::Libcall Op;
- const char * const Name;
+ const char *const Name;
const CallingConv::ID CC;
} LibraryCalls[] = {
- { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
- { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
- { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
- { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
- { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
+ {RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall},
+ {RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall},
+ {RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall},
+ {RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall},
+ {RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall},
};
for (const auto &LC : LibraryCalls) {
@@ -210,10 +210,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// We don't accept any truncstore of integer registers.
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::i64, MVT::i16, Expand);
- setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i64, MVT::i8, Expand);
setTruncStoreAction(MVT::i32, MVT::i16, Expand);
- setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
- setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i8, Expand);
+ setTruncStoreAction(MVT::i16, MVT::i8, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -225,106 +225,106 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Integer absolute.
if (Subtarget.canUseCMOV()) {
- setOperationAction(ISD::ABS , MVT::i16 , Custom);
- setOperationAction(ISD::ABS , MVT::i32 , Custom);
+ setOperationAction(ISD::ABS, MVT::i16, Custom);
+ setOperationAction(ISD::ABS, MVT::i32, Custom);
if (Subtarget.is64Bit())
- setOperationAction(ISD::ABS , MVT::i64 , Custom);
+ setOperationAction(ISD::ABS, MVT::i64, Custom);
}
// Absolute difference.
for (auto Op : {ISD::ABDS, ISD::ABDU}) {
- setOperationAction(Op , MVT::i8 , Custom);
- setOperationAction(Op , MVT::i16 , Custom);
- setOperationAction(Op , MVT::i32 , Custom);
+ setOperationAction(Op, MVT::i8, Custom);
+ setOperationAction(Op, MVT::i16, Custom);
+ setOperationAction(Op, MVT::i32, Custom);
if (Subtarget.is64Bit())
- setOperationAction(Op , MVT::i64 , Custom);
+ setOperationAction(Op, MVT::i64, Custom);
}
// Signed saturation subtraction.
- setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom);
- setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom);
- setOperationAction(ISD::SSUBSAT , MVT::i32 , Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::i8, Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::i16, Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::i32, Custom);
if (Subtarget.is64Bit())
- setOperationAction(ISD::SSUBSAT , MVT::i64 , Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::i64, Custom);
// Funnel shifts.
for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
// For slow shld targets we only lower for code size.
LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
- setOperationAction(ShiftOp , MVT::i8 , Custom);
- setOperationAction(ShiftOp , MVT::i16 , Custom);
- setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
+ setOperationAction(ShiftOp, MVT::i8, Custom);
+ setOperationAction(ShiftOp, MVT::i16, Custom);
+ setOperationAction(ShiftOp, MVT::i32, ShiftDoubleAction);
if (Subtarget.is64Bit())
- setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
+ setOperationAction(ShiftOp, MVT::i64, ShiftDoubleAction);
}
if (!Subtarget.useSoftFloat()) {
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
- setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
// We have an algorithm for SSE2, and we turn this into a 64-bit
// FILD or VCVTUSI2SS/SD for other targets.
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
// We have an algorithm for SSE2->double, and we turn this into a
// 64-bit FILD followed by conditional FADD for other targets.
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
// Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
// this operation.
- setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
// SSE has no i16 to fp conversion, only i32. We promote in the handler
// to allow f80 to use i16 and f64 to use i16 with sse1 only
- setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
// f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
// are Legal, f80 is custom lowered.
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
// Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
// this operation.
- setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
// FIXME: This doesn't generate invalid exception when it should. PR44019.
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
// are Legal, f80 is custom lowered.
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
// Handle FP_TO_UINT by promoting the destination to a larger signed
// conversion.
- setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
// FIXME: This doesn't generate invalid exception when it should. PR44019.
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
// FIXME: This doesn't generate invalid exception when it should. PR44019.
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
- setOperationAction(ISD::LRINT, MVT::f32, Custom);
- setOperationAction(ISD::LRINT, MVT::f64, Custom);
- setOperationAction(ISD::LLRINT, MVT::f32, Custom);
- setOperationAction(ISD::LLRINT, MVT::f64, Custom);
+ setOperationAction(ISD::LRINT, MVT::f32, Custom);
+ setOperationAction(ISD::LRINT, MVT::f64, Custom);
+ setOperationAction(ISD::LLRINT, MVT::f32, Custom);
+ setOperationAction(ISD::LLRINT, MVT::f64, Custom);
if (!Subtarget.is64Bit()) {
- setOperationAction(ISD::LRINT, MVT::i64, Custom);
+ setOperationAction(ISD::LRINT, MVT::i64, Custom);
setOperationAction(ISD::LLRINT, MVT::i64, Custom);
}
}
@@ -332,7 +332,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasSSE2()) {
// Custom lowering for saturating float to int conversions.
// We handle promotion to larger result types manually.
- for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
+ for (MVT VT : {MVT::i8, MVT::i16, MVT::i32}) {
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
}
@@ -367,17 +367,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
if (!Subtarget.hasSSE2()) {
- setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
- setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
if (Subtarget.is64Bit()) {
- setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
+ setOperationAction(ISD::BITCAST, MVT::f64, Expand);
// Without SSE, i64->f64 goes through memory.
- setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
+ setOperationAction(ISD::BITCAST, MVT::i64, Expand);
}
} else if (!Subtarget.is64Bit())
- setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
+ setOperationAction(ISD::BITCAST, MVT::i64, Custom);
// Scalar integer divide and remainder are lowered to use operations that
// produce two results, to match the available instructions. This exposes
@@ -389,7 +389,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// (low) operations are left as Legal, as there are single-result
// instructions for this in x86. Using the two-result multiply instructions
// when both high and low results are needed must be arranged by dagcombine.
- for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+ for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
@@ -398,47 +398,47 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UREM, VT, Expand);
}
- setOperationAction(ISD::BR_JT , MVT::Other, Expand);
- setOperationAction(ISD::BRCOND , MVT::Other, Custom);
- for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
- MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
- setOperationAction(ISD::BR_CC, VT, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ for (auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128, MVT::i8, MVT::i16,
+ MVT::i32, MVT::i64}) {
+ setOperationAction(ISD::BR_CC, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
}
if (Subtarget.is64Bit())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- setOperationAction(ISD::FREM , MVT::f32 , Expand);
- setOperationAction(ISD::FREM , MVT::f64 , Expand);
- setOperationAction(ISD::FREM , MVT::f80 , Expand);
- setOperationAction(ISD::FREM , MVT::f128 , Expand);
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f80, Expand);
+ setOperationAction(ISD::FREM, MVT::f128, Expand);
if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
- setOperationAction(ISD::GET_ROUNDING , MVT::i32 , Custom);
- setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom);
- setOperationAction(ISD::GET_FPENV_MEM , MVT::Other, Custom);
- setOperationAction(ISD::SET_FPENV_MEM , MVT::Other, Custom);
- setOperationAction(ISD::RESET_FPENV , MVT::Other, Custom);
+ setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
+ setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
+ setOperationAction(ISD::GET_FPENV_MEM, MVT::Other, Custom);
+ setOperationAction(ISD::SET_FPENV_MEM, MVT::Other, Custom);
+ setOperationAction(ISD::RESET_FPENV, MVT::Other, Custom);
}
// Promote the i8 variants and force them on up to i32 which has a shorter
// encoding.
- setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
- setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
+ setOperationPromotedToType(ISD::CTTZ, MVT::i8, MVT::i32);
+ setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8, MVT::i32);
// Promoted i16. tzcntw has a false dependency on Intel CPUs. For BSF, we emit
// a REP prefix to encode it as TZCNT for modern CPUs so it makes sense to
// promote that too.
- setOperationPromotedToType(ISD::CTTZ , MVT::i16 , MVT::i32);
- setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , MVT::i32);
+ setOperationPromotedToType(ISD::CTTZ, MVT::i16, MVT::i32);
+ setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16, MVT::i32);
if (!Subtarget.hasBMI()) {
- setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
+ setOperationAction(ISD::CTTZ, MVT::i32, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Legal);
if (Subtarget.is64Bit()) {
- setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
+ setOperationAction(ISD::CTTZ, MVT::i64, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
}
}
@@ -446,13 +446,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasLZCNT()) {
// When promoting the i8 variants, force them to i32 for a shorter
// encoding.
- setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
- setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
+ setOperationPromotedToType(ISD::CTLZ, MVT::i8, MVT::i32);
+ setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8, MVT::i32);
} else {
for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
- setOperationAction(ISD::CTLZ , VT, Custom);
+ setOperationAction(ISD::CTLZ, VT, Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
}
}
@@ -497,36 +497,36 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// on the dest that popcntl hasn't had since Cannon Lake.
setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
} else {
- setOperationAction(ISD::CTPOP , MVT::i8 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i16 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i32 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
+ setOperationAction(ISD::CTPOP, MVT::i8, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i64, Custom);
}
- setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
if (!Subtarget.hasMOVBE())
- setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
+ setOperationAction(ISD::BSWAP, MVT::i16, Expand);
// X86 wants to expand cmov itself.
- for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
+ for (auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
}
- for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+ for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
}
// Custom action for SELECT MMX and expand action for SELECT_CC MMX
setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
- setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
+ setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
// NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
// LLVM/Clang supports zero-cost DWARF and SEH exception handling.
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
@@ -536,19 +536,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
// Darwin ABI issue.
- for (auto VT : { MVT::i32, MVT::i64 }) {
+ for (auto VT : {MVT::i32, MVT::i64}) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
- setOperationAction(ISD::ConstantPool , VT, Custom);
- setOperationAction(ISD::JumpTable , VT, Custom);
- setOperationAction(ISD::GlobalAddress , VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::JumpTable, VT, Custom);
+ setOperationAction(ISD::GlobalAddress, VT, Custom);
setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
- setOperationAction(ISD::ExternalSymbol , VT, Custom);
- setOperationAction(ISD::BlockAddress , VT, Custom);
+ setOperationAction(ISD::ExternalSymbol, VT, Custom);
+ setOperationAction(ISD::BlockAddress, VT, Custom);
}
// 64-bit shl, sra, srl (iff 32-bit x86)
- for (auto VT : { MVT::i32, MVT::i64 }) {
+ for (auto VT : {MVT::i32, MVT::i64}) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
setOperationAction(ISD::SHL_PARTS, VT, Custom);
@@ -557,12 +557,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (Subtarget.hasSSEPrefetch())
- setOperationAction(ISD::PREFETCH , MVT::Other, Custom);
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
- setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Expand certain atomics
- for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+ for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
@@ -606,14 +606,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
- setOperationAction(ISD::VASTART , MVT::Other, Custom);
- setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
bool Is64Bit = Subtarget.is64Bit();
- setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
- setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
- setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
@@ -623,7 +623,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
- auto setF16Action = [&] (MVT VT, LegalizeAction Action) {
+ auto setF16Action = [&](MVT VT, LegalizeAction Action) {
setOperationAction(ISD::FABS, VT, Action);
setOperationAction(ISD::FNEG, VT, Action);
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
@@ -678,7 +678,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// non-optsize case.
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
- for (auto VT : { MVT::f32, MVT::f64 }) {
+ for (auto VT : {MVT::f32, MVT::f64}) {
// Use ANDPD to simulate FABS.
setOperationAction(ISD::FABS, VT, Custom);
@@ -693,8 +693,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSUB, VT, Custom);
// We don't support sin/cos/fmod
- setOperationAction(ISD::FSIN , VT, Expand);
- setOperationAction(ISD::FCOS , VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
}
@@ -757,10 +757,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::f64, &X86::RFP64RegClass);
// Use ANDPS to simulate FABS.
- setOperationAction(ISD::FABS , MVT::f32, Custom);
+ setOperationAction(ISD::FABS, MVT::f32, Custom);
// Use XORP to simulate FNEG.
- setOperationAction(ISD::FNEG , MVT::f32, Custom);
+ setOperationAction(ISD::FNEG, MVT::f32, Custom);
if (UseX87)
setOperationAction(ISD::UNDEF, MVT::f64, Expand);
@@ -771,8 +771,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
// We don't support sin/cos/fmod
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
if (UseX87) {
@@ -787,13 +787,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::f64, &X86::RFP64RegClass);
addRegisterClass(MVT::f32, &X86::RFP32RegClass);
- for (auto VT : { MVT::f32, MVT::f64 }) {
- setOperationAction(ISD::UNDEF, VT, Expand);
+ for (auto VT : {MVT::f32, MVT::f64}) {
+ setOperationAction(ISD::UNDEF, VT, Expand);
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
// Always expand sin/cos functions even though x87 has an instruction.
- setOperationAction(ISD::FSIN , VT, Expand);
- setOperationAction(ISD::FCOS , VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
}
}
@@ -805,7 +805,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addLegalFPImmediate(APFloat(+1.0f)); // FLD1
addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
- } else // SSE immediates.
+ } else // SSE immediates.
addLegalFPImmediate(APFloat(+0.0f)); // xorps
}
// Expand FP64 immediates into loads from the stack, save special cases.
@@ -815,7 +815,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addLegalFPImmediate(APFloat(+1.0)); // FLD1
addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
- } else // SSE immediates.
+ } else // SSE immediates.
addLegalFPImmediate(APFloat(+0.0)); // xorpd
}
// Support fp16 0 immediate.
@@ -823,18 +823,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));
// Handle constrained floating-point operations of scalar.
- setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
// We don't support FMA.
setOperationAction(ISD::FMA, MVT::f64, Expand);
@@ -843,21 +843,21 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// f80 always uses X87.
if (UseX87) {
addRegisterClass(MVT::f80, &X86::RFP80RegClass);
- setOperationAction(ISD::UNDEF, MVT::f80, Expand);
+ setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
{
APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
- addLegalFPImmediate(TmpFlt); // FLD0
+ addLegalFPImmediate(TmpFlt); // FLD0
TmpFlt.changeSign();
- addLegalFPImmediate(TmpFlt); // FLD0/FCHS
+ addLegalFPImmediate(TmpFlt); // FLD0/FCHS
bool ignored;
APFloat TmpFlt2(+1.0);
- TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
- &ignored);
- addLegalFPImmediate(TmpFlt2); // FLD1
+ TmpFlt2.convert(APFloat::x87DoubleExtended(),
+ APFloat::rmNearestTiesToEven, &ignored);
+ addLegalFPImmediate(TmpFlt2); // FLD1
TmpFlt2.changeSign();
- addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
+ addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
}
// Always expand sin/cos functions even though x87 has an instruction.
@@ -876,9 +876,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// clang-format on
setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
- setOperationAction(ISD::FCEIL, MVT::f80, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f80, Expand);
setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
- setOperationAction(ISD::FRINT, MVT::f80, Expand);
+ setOperationAction(ISD::FRINT, MVT::f80, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
setOperationAction(ISD::FROUNDEVEN, MVT::f80, Expand);
setOperationAction(ISD::FMA, MVT::f80, Expand);
@@ -888,12 +888,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::LLRINT, MVT::f80, Custom);
// Handle constrained floating-point operations of scalar.
- setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
- setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
- setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
- setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
- setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
- setOperationAction(ISD::FCANONICALIZE , MVT::f80, Custom);
+ setOperationAction(ISD::STRICT_FADD, MVT::f80, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f80, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f80, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f80, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f80, Legal);
+ setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom);
if (isTypeLegal(MVT::f16)) {
setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
@@ -912,16 +912,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
- setOperationAction(ISD::FADD, MVT::f128, LibCall);
+ setOperationAction(ISD::FADD, MVT::f128, LibCall);
setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
- setOperationAction(ISD::FSUB, MVT::f128, LibCall);
+ setOperationAction(ISD::FSUB, MVT::f128, LibCall);
setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
- setOperationAction(ISD::FDIV, MVT::f128, LibCall);
+ setOperationAction(ISD::FDIV, MVT::f128, LibCall);
setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
- setOperationAction(ISD::FMUL, MVT::f128, LibCall);
+ setOperationAction(ISD::FMUL, MVT::f128, LibCall);
setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
- setOperationAction(ISD::FMA, MVT::f128, LibCall);
- setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
+ setOperationAction(ISD::FMA, MVT::f128, LibCall);
+ setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
setOperationAction(ISD::FABS, MVT::f128, Custom);
setOperationAction(ISD::FNEG, MVT::f128, Custom);
@@ -937,10 +937,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FTAN, MVT::f128, LibCall);
// clang-format on
// No STRICT_FSINCOS
- setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
+ setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
- setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
// We need to custom handle any FP_ROUND with an f128 input, but
// LegalizeDAG uses the result type to know when to run a custom handler.
@@ -970,10 +970,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
// Always use a library call for pow.
- setOperationAction(ISD::FPOW , MVT::f32 , Expand);
- setOperationAction(ISD::FPOW , MVT::f64 , Expand);
- setOperationAction(ISD::FPOW , MVT::f80 , Expand);
- setOperationAction(ISD::FPOW , MVT::f128 , Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::f80, Expand);
+ setOperationAction(ISD::FPOW, MVT::f128, Expand);
setOperationAction(ISD::FLOG, MVT::f80, Expand);
setOperationAction(ISD::FLOG2, MVT::f80, Expand);
@@ -985,9 +985,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
// Some FP actions are always expanded for vector types.
- for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
- MVT::v4f32, MVT::v8f32, MVT::v16f32,
- MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
+ for (auto VT : {MVT::v8f16, MVT::v16f16, MVT::v32f16, MVT::v4f32, MVT::v8f32,
+ MVT::v16f32, MVT::v2f64, MVT::v4f64, MVT::v8f64}) {
// clang-format off
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
@@ -1013,11 +1012,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
- setOperationAction(ISD::FMA, VT, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Expand);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Expand);
+ setOperationAction(ISD::FMA, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
@@ -1041,7 +1040,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
setOperationAction(ISD::TRUNCATE, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
@@ -1079,30 +1078,30 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
- setOperationAction(ISD::FMAXIMUM, MVT::f32, Custom);
- setOperationAction(ISD::FMINIMUM, MVT::f32, Custom);
- setOperationAction(ISD::FMAXIMUMNUM, MVT::f32, Custom);
- setOperationAction(ISD::FMINIMUMNUM, MVT::f32, Custom);
-
- setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
- setOperationAction(ISD::FABS, MVT::v4f32, Custom);
- setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
- setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
+ setOperationAction(ISD::FMAXIMUM, MVT::f32, Custom);
+ setOperationAction(ISD::FMINIMUM, MVT::f32, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, MVT::f32, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, MVT::f32, Custom);
+
+ setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
+ setOperationAction(ISD::FABS, MVT::v4f32, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
- setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Custom);
- setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
- setOperationAction(ISD::STORE, MVT::v2f32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
+ setOperationAction(ISD::STORE, MVT::v2f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Custom);
- setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
@@ -1122,74 +1121,74 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
- for (auto VT : { MVT::f64, MVT::v4f32, MVT::v2f64 }) {
+ for (auto VT : {MVT::f64, MVT::v4f32, MVT::v2f64}) {
setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
}
- for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
- MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
+ for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16,
+ MVT::v2i32}) {
setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::SREM, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);
setOperationAction(ISD::UREM, VT, Custom);
}
- setOperationAction(ISD::MUL, MVT::v2i8, Custom);
- setOperationAction(ISD::MUL, MVT::v4i8, Custom);
- setOperationAction(ISD::MUL, MVT::v8i8, Custom);
-
- setOperationAction(ISD::MUL, MVT::v16i8, Custom);
- setOperationAction(ISD::MUL, MVT::v4i32, Custom);
- setOperationAction(ISD::MUL, MVT::v2i64, Custom);
- setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
- setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
- setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
- setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
- setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
- setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
- setOperationAction(ISD::MUL, MVT::v8i16, Legal);
- setOperationAction(ISD::AVGCEILU, MVT::v16i8, Legal);
- setOperationAction(ISD::AVGCEILU, MVT::v8i16, Legal);
-
- setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
- setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
- setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
-
- setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i8, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i8, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i8, Custom);
+
+ setOperationAction(ISD::MUL, MVT::v16i8, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
+ setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
+ setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
+ setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
+ setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
+ setOperationAction(ISD::MUL, MVT::v8i16, Legal);
+ setOperationAction(ISD::AVGCEILU, MVT::v16i8, Legal);
+ setOperationAction(ISD::AVGCEILU, MVT::v8i16, Legal);
+
+ setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
+ setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
+ setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
+
+ setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Custom);
- setOperationAction(ISD::FABS, MVT::v2f64, Custom);
- setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
+ setOperationAction(ISD::FABS, MVT::v2f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
setOperationAction(ISD::LRINT, MVT::v4f32, Custom);
setOperationAction(ISD::LRINT, MVT::v2i32, Custom);
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
}
- setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
- setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
- setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
- setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
- setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
- setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
- setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
- setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
- setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
- setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::ABS, VT, Custom);
@@ -1202,30 +1201,30 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setCondCodeAction(ISD::SETLE, VT, Custom);
}
- setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
- setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
- setOperationAction(ISD::STRICT_FSETCC, MVT::v2f64, Custom);
- setOperationAction(ISD::STRICT_FSETCC, MVT::v4f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v2f64, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v4f32, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::v2f64, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::v4f32, Custom);
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
- setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Custom);
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) {
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
- for (auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Custom);
+ for (auto VT : {MVT::v8f16, MVT::v2f64, MVT::v2i64}) {
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Custom);
if (VT == MVT::v2i64 && !Subtarget.is64Bit())
continue;
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
setF16Action(MVT::v8f16, Expand);
@@ -1238,67 +1237,67 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Custom);
// Custom lower v2i64 and v2f64 selects.
- setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
- setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
- setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
- setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
- setOperationAction(ISD::SELECT, MVT::v8f16, Custom);
- setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
-
- setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v8f16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
// Custom legalize these to avoid over promotion or custom promotion.
for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
- setOperationAction(ISD::FP_TO_SINT, VT, Custom);
- setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
}
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
// Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
- setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
- setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
- setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
// We want to legalize this to an f64 load rather than an i64 load on
// 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
// store.
- setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
- setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
- setOperationAction(ISD::STORE, MVT::v2i32, Custom);
- setOperationAction(ISD::STORE, MVT::v4i16, Custom);
- setOperationAction(ISD::STORE, MVT::v8i8, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
+ setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
+ setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i16, Custom);
+ setOperationAction(ISD::STORE, MVT::v8i8, Custom);
// Add 32-bit vector stores to help vectorization opportunities.
- setOperationAction(ISD::STORE, MVT::v2i16, Custom);
- setOperationAction(ISD::STORE, MVT::v4i8, Custom);
+ setOperationAction(ISD::STORE, MVT::v2i16, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i8, Custom);
- setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
- setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
- setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
if (!Subtarget.hasAVX512())
setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
@@ -1308,41 +1307,42 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v2i64, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i64, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i64, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i64, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
// In the customized shift lowering, the legal v4i32/v2i64 cases
// in AVX2 will be recognized.
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
- setOperationAction(ISD::SRL, VT, Custom);
- setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::SRA, VT, Custom);
- if (VT == MVT::v2i64) continue;
- setOperationAction(ISD::ROTL, VT, Custom);
- setOperationAction(ISD::ROTR, VT, Custom);
- setOperationAction(ISD::FSHL, VT, Custom);
- setOperationAction(ISD::FSHR, VT, Custom);
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
+ setOperationAction(ISD::SRL, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ if (VT == MVT::v2i64)
+ continue;
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
+ setOperationAction(ISD::FSHL, VT, Custom);
+ setOperationAction(ISD::FSHR, VT, Custom);
}
- setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
}
if (Subtarget.hasGFNI()) {
@@ -1353,73 +1353,73 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
- setOperationAction(ISD::ABS, MVT::v16i8, Legal);
- setOperationAction(ISD::ABS, MVT::v8i16, Legal);
- setOperationAction(ISD::ABS, MVT::v4i32, Legal);
+ setOperationAction(ISD::ABS, MVT::v16i8, Legal);
+ setOperationAction(ISD::ABS, MVT::v8i16, Legal);
+ setOperationAction(ISD::ABS, MVT::v4i32, Legal);
for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
- setOperationAction(ISD::BITREVERSE, VT, Custom);
- setOperationAction(ISD::CTLZ, VT, Custom);
+ setOperationAction(ISD::BITREVERSE, VT, Custom);
+ setOperationAction(ISD::CTLZ, VT, Custom);
}
// These might be better off as horizontal vector ops.
- setOperationAction(ISD::ADD, MVT::i16, Custom);
- setOperationAction(ISD::ADD, MVT::i32, Custom);
- setOperationAction(ISD::SUB, MVT::i16, Custom);
- setOperationAction(ISD::SUB, MVT::i32, Custom);
+ setOperationAction(ISD::ADD, MVT::i16, Custom);
+ setOperationAction(ISD::ADD, MVT::i32, Custom);
+ setOperationAction(ISD::SUB, MVT::i16, Custom);
+ setOperationAction(ISD::SUB, MVT::i32, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
- setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
- setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
- setOperationAction(ISD::FCEIL, RoundedTy, Legal);
- setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
- setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
- setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
- setOperationAction(ISD::FRINT, RoundedTy, Legal);
- setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
- setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
- setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
- setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
- setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
-
- setOperationAction(ISD::FROUND, RoundedTy, Custom);
- }
-
- setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
- setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
- setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
- setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
- setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
- setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
- setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
- setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
-
- setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
- setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom);
- setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom);
+ setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
+ setOperationAction(ISD::FCEIL, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
+ setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
+ setOperationAction(ISD::FRINT, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
+ setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
+ setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
+
+ setOperationAction(ISD::FROUND, RoundedTy, Custom);
+ }
+
+ setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
+ setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
+ setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
+ setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
+ setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
+ setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
+ setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
+ setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
+
+ setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
+ setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom);
// FIXME: Do we need to handle scalar-to-vector here?
- setOperationAction(ISD::MUL, MVT::v4i32, Legal);
- setOperationAction(ISD::SMULO, MVT::v2i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+ setOperationAction(ISD::SMULO, MVT::v2i32, Custom);
// We directly match byte blends in the backend as they match the VSELECT
// condition form.
- setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
// SSE41 brings specific instructions for doing vector sign extend even in
// cases where we don't have SRA.
- for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+ for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
}
// SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
- for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
- setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
- setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
- setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
+ for (auto LoadExtOp : {ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
+ setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
+ setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
+ setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
@@ -1428,73 +1428,73 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
// We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
// do the pre and post work in the vector domain.
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
// We need to mark SINT_TO_FP as Custom even though we want to expand it
// so that DAG combine doesn't try to turn it into uint_to_fp.
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
}
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
- setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
- MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v32i8,
+ MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
setOperationAction(ISD::ROTL, VT, Custom);
setOperationAction(ISD::ROTR, VT, Custom);
}
// XOP can efficiently perform BITREVERSE with VPPERM.
- for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
+ for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64})
setOperationAction(ISD::BITREVERSE, VT, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
bool HasInt256 = Subtarget.hasInt256();
- addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
+ addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
+ : &X86::VR256RegClass);
addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
addRegisterClass(MVT::v16f16, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
- addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
- addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
- addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
- addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
- : &X86::VR256RegClass);
-
- for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
- setOperationAction(ISD::FFLOOR, VT, Legal);
- setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
- setOperationAction(ISD::FCEIL, VT, Legal);
- setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
- setOperationAction(ISD::FTRUNC, VT, Legal);
- setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
- setOperationAction(ISD::FRINT, VT, Legal);
- setOperationAction(ISD::STRICT_FRINT, VT, Legal);
- setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
+ : &X86::VR256RegClass);
+ addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
+ : &X86::VR256RegClass);
+ addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
+ : &X86::VR256RegClass);
+ addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
+ : &X86::VR256RegClass);
+
+ for (auto VT : {MVT::v8f32, MVT::v4f64}) {
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+ setOperationAction(ISD::FRINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+ setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
- setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
- setOperationAction(ISD::FROUND, VT, Custom);
+ setOperationAction(ISD::FROUND, VT, Custom);
- setOperationAction(ISD::FNEG, VT, Custom);
- setOperationAction(ISD::FABS, VT, Custom);
- setOperationAction(ISD::FCOPYSIGN, VT, Custom);
+ setOperationAction(ISD::FNEG, VT, Custom);
+ setOperationAction(ISD::FABS, VT, Custom);
+ setOperationAction(ISD::FCOPYSIGN, VT, Custom);
- setOperationAction(ISD::FMAXIMUM, VT, Custom);
- setOperationAction(ISD::FMINIMUM, VT, Custom);
- setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
- setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
+ setOperationAction(ISD::FMAXIMUM, VT, Custom);
+ setOperationAction(ISD::FMINIMUM, VT, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
}
@@ -1503,81 +1503,82 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
// even though v8i16 is a legal type.
- setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
- setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
- setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Custom);
-
- setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Custom);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Custom);
- setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::v8f16, Expand);
- setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Custom);
-
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Custom);
+
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::v8f16, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Custom);
+
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
if (!Subtarget.hasAVX512())
setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
// In the customized shift lowering, the legal v8i32/v4i64 cases
// in AVX2 will be recognized.
- for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
- setOperationAction(ISD::SRL, VT, Custom);
- setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::SRA, VT, Custom);
- setOperationAction(ISD::ABDS, VT, Custom);
- setOperationAction(ISD::ABDU, VT, Custom);
- if (VT == MVT::v4i64) continue;
- setOperationAction(ISD::ROTL, VT, Custom);
- setOperationAction(ISD::ROTR, VT, Custom);
- setOperationAction(ISD::FSHL, VT, Custom);
- setOperationAction(ISD::FSHR, VT, Custom);
+ for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
+ setOperationAction(ISD::SRL, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::ABDS, VT, Custom);
+ setOperationAction(ISD::ABDU, VT, Custom);
+ if (VT == MVT::v4i64)
+ continue;
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
+ setOperationAction(ISD::FSHL, VT, Custom);
+ setOperationAction(ISD::FSHR, VT, Custom);
}
// These types need custom splitting if their input is a 128-bit vector.
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
-
- setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
- setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
- setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
- setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
- setOperationAction(ISD::SELECT, MVT::v16f16, Custom);
- setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
- setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
-
- for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
- setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
- setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
- setOperationAction(ISD::ANY_EXTEND, VT, Custom);
- }
-
- setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v32i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v32i32, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v32i64, Custom);
-
- for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
- setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::CTLZ, VT, Custom);
- setOperationAction(ISD::BITREVERSE, VT, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
+
+ setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v16f16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
+ setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
+
+ for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
+ setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
+ setOperationAction(ISD::ANY_EXTEND, VT, Custom);
+ }
+
+ setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v32i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v32i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v32i64, Custom);
+
+ for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::CTPOP, VT, Custom);
+ setOperationAction(ISD::CTLZ, VT, Custom);
+ setOperationAction(ISD::BITREVERSE, VT, Custom);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -1585,64 +1586,64 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setCondCodeAction(ISD::SETLE, VT, Custom);
}
- setOperationAction(ISD::SETCC, MVT::v4f64, Custom);
- setOperationAction(ISD::SETCC, MVT::v8f32, Custom);
- setOperationAction(ISD::STRICT_FSETCC, MVT::v4f64, Custom);
- setOperationAction(ISD::STRICT_FSETCC, MVT::v8f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4f64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v8f32, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v4f64, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v8f32, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::v4f64, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::v8f32, Custom);
if (Subtarget.hasAnyFMA()) {
- for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
- MVT::v2f64, MVT::v4f64 }) {
+ for (auto VT : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32, MVT::v2f64,
+ MVT::v4f64}) {
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::STRICT_FMA, VT, Legal);
}
}
- for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
+ for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
}
- setOperationAction(ISD::MUL, MVT::v4i64, Custom);
- setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::MUL, MVT::v32i8, Custom);
-
- setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
- setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
- setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
- setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
- setOperationAction(ISD::AVGCEILU, MVT::v16i16, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::AVGCEILU, MVT::v32i8, HasInt256 ? Legal : Custom);
-
- setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
- setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
-
- setOperationAction(ISD::ABS, MVT::v4i64, Custom);
- setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
- setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
- setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
- setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
-
- setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
- setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
- setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
- setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
-
- for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
- setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::MUL, MVT::v4i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::MUL, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
+ setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
+ setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
+ setOperationAction(ISD::AVGCEILU, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::AVGCEILU, MVT::v32i8, HasInt256 ? Legal : Custom);
+
+ setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
+ setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::ABS, MVT::v4i64, Custom);
+ setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
+ setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
+ setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
+ setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
+
+ setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
+
+ for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32}) {
+ setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
@@ -1661,41 +1662,41 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
// AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
- for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
+ for (auto LoadExtOp : {ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
- setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
- setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
- setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
- setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
- setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
+ setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
+ setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
+ setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
+ setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
+ setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
}
}
- for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
- MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
- setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
+ for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32,
+ MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
+ setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
setOperationAction(ISD::MSTORE, VT, Legal);
}
// Extract subvector is special because the value type
// (result) is 128-bit but the source is 256-bit wide.
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
- MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v8f16,
+ MVT::v4f32, MVT::v2f64}) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
}
// Custom lower several nodes for 256-bit types.
- for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
- MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v16f16,
+ MVT::v8f32, MVT::v4f64}) {
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
}
setF16Action(MVT::v16f16, Expand);
setOperationAction(ISD::FNEG, MVT::v16f16, Custom);
@@ -1713,21 +1714,21 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
- for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
- MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
- setOperationAction(ISD::MGATHER, VT, Custom);
+ for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
+ MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64})
+ setOperationAction(ISD::MGATHER, VT, Custom);
}
}
if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
Subtarget.hasF16C()) {
- for (MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
- setOperationAction(ISD::FP_ROUND, VT, Custom);
- setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
+ for (MVT VT : {MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16}) {
+ setOperationAction(ISD::FP_ROUND, VT, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
}
- for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32, MVT::v8f32 }) {
- setOperationAction(ISD::FP_EXTEND, VT, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom);
+ for (MVT VT : {MVT::f32, MVT::v2f32, MVT::v4f32, MVT::v8f32}) {
+ setOperationAction(ISD::FP_EXTEND, VT, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom);
}
for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32);
@@ -1741,28 +1742,28 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// available with AVX512. 512-bit vectors are in a separate block controlled
// by useAVX512Regs.
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
- addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
- addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
- addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
- addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
- addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
+ addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
+ addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
+ addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
+ addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
+ addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
- setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
+ setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
-
- setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
- setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
- setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
- setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
- setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
- setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
- setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
- setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
- setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
+
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Custom);
@@ -1781,29 +1782,29 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
// Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
- for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
- setOperationAction(ISD::ANY_EXTEND, VT, Custom);
+ setOperationAction(ISD::ANY_EXTEND, VT, Custom);
}
- for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
- setOperationAction(ISD::VSELECT, VT, Expand);
+ for (auto VT : {MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1})
+ setOperationAction(ISD::VSELECT, VT, Expand);
- for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
- setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::TRUNCATE, VT, Custom);
+ for (auto VT : {MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1}) {
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::TRUNCATE, VT, Custom);
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
}
- for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
+ for (auto VT : {MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1})
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
}
if (Subtarget.hasDQI() && Subtarget.hasVLX()) {
@@ -1821,30 +1822,30 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
- addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
- addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
+ addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
+ addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
- addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
+ addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
- setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
+ setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
- setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
- setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
- setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
+ setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
+ setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
+ setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
if (HasBWI)
setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
}
- for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
+ for (MVT VT : {MVT::v16f32, MVT::v8f64}) {
setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
- setOperationAction(ISD::FNEG, VT, Custom);
- setOperationAction(ISD::FABS, VT, Custom);
- setOperationAction(ISD::FMA, VT, Legal);
+ setOperationAction(ISD::FNEG, VT, Custom);
+ setOperationAction(ISD::FABS, VT, Custom);
+ setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::STRICT_FMA, VT, Legal);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
@@ -1856,93 +1857,93 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasDQI())
setOperationAction(ISD::LLRINT, MVT::v8f64, Legal);
- for (MVT VT : { MVT::v16i1, MVT::v16i8 }) {
- setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
- setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
+ for (MVT VT : {MVT::v16i1, MVT::v16i8}) {
+ setOperationPromotedToType(ISD::FP_TO_SINT, VT, MVT::v16i32);
+ setOperationPromotedToType(ISD::FP_TO_UINT, VT, MVT::v16i32);
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
}
- for (MVT VT : { MVT::v16i16, MVT::v16i32 }) {
- setOperationAction(ISD::FP_TO_SINT, VT, Custom);
- setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ for (MVT VT : {MVT::v16i16, MVT::v16i32}) {
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
}
- setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Custom);
- setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Custom);
-
- setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
-
- setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
- setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
- setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
- setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
- setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Custom);
+
+ setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
+
+ setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
+ setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
+ setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
+ setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
+ setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
if (HasBWI)
- setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
+ setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
// With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
// to 512-bit rather than use the AVX2 instructions so that we can use
// k-masks.
if (!Subtarget.hasVLX()) {
for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
- MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
- setOperationAction(ISD::MLOAD, VT, Custom);
+ MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
+ setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
}
}
- setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
- setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
- setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
+ setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
- setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
- setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
if (HasBWI) {
// Extends from v64i1 masks to 512-bit vectors.
- setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
- setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
- }
-
- for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
- setOperationAction(ISD::FFLOOR, VT, Legal);
- setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
- setOperationAction(ISD::FCEIL, VT, Legal);
- setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
- setOperationAction(ISD::FTRUNC, VT, Legal);
- setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
- setOperationAction(ISD::FRINT, VT, Legal);
- setOperationAction(ISD::STRICT_FRINT, VT, Legal);
- setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
+ }
+
+ for (auto VT : {MVT::v16f32, MVT::v8f64}) {
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+ setOperationAction(ISD::FRINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+ setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
- setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
- setOperationAction(ISD::FROUND, VT, Custom);
+ setOperationAction(ISD::FROUND, VT, Custom);
}
for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
@@ -1952,36 +1953,36 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
- setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
- setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
- setOperationAction(ISD::MUL, MVT::v8i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i64, Custom);
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
- setOperationAction(ISD::MUL, MVT::v64i8, Custom);
+ setOperationAction(ISD::MUL, MVT::v64i8, Custom);
setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
- setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
- setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
+ setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
+ setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
setOperationAction(ISD::AVGCEILU, MVT::v32i16, HasBWI ? Legal : Custom);
- setOperationAction(ISD::AVGCEILU, MVT::v64i8, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::AVGCEILU, MVT::v64i8, HasBWI ? Legal : Custom);
setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
- for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
- setOperationAction(ISD::SRL, VT, Custom);
- setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::SRA, VT, Custom);
- setOperationAction(ISD::ROTL, VT, Custom);
- setOperationAction(ISD::ROTR, VT, Custom);
- setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::ABDS, VT, Custom);
- setOperationAction(ISD::ABDU, VT, Custom);
- setOperationAction(ISD::BITREVERSE, VT, Custom);
+ for (auto VT : {MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
+ setOperationAction(ISD::SRL, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::ABDS, VT, Custom);
+ setOperationAction(ISD::ABDU, VT, Custom);
+ setOperationAction(ISD::BITREVERSE, VT, Custom);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -1989,82 +1990,83 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setCondCodeAction(ISD::SETLE, VT, Custom);
}
- setOperationAction(ISD::SETCC, MVT::v8f64, Custom);
- setOperationAction(ISD::SETCC, MVT::v16f32, Custom);
- setOperationAction(ISD::STRICT_FSETCC, MVT::v8f64, Custom);
- setOperationAction(ISD::STRICT_FSETCC, MVT::v16f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v8f64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v16f32, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v8f64, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v16f32, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::v8f64, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::v16f32, Custom);
- for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
- setOperationAction(ISD::SMAX, VT, Legal);
- setOperationAction(ISD::UMAX, VT, Legal);
- setOperationAction(ISD::SMIN, VT, Legal);
- setOperationAction(ISD::UMIN, VT, Legal);
- setOperationAction(ISD::ABS, VT, Legal);
- setOperationAction(ISD::CTPOP, VT, Custom);
- }
-
- for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
- setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
- setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
- setOperationAction(ISD::CTLZ, VT, Custom);
- setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
- setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
- setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
- setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
+ for (auto VT : {MVT::v16i32, MVT::v8i64}) {
+ setOperationAction(ISD::SMAX, VT, Legal);
+ setOperationAction(ISD::UMAX, VT, Legal);
+ setOperationAction(ISD::SMIN, VT, Legal);
+ setOperationAction(ISD::UMIN, VT, Legal);
+ setOperationAction(ISD::ABS, VT, Legal);
+ setOperationAction(ISD::CTPOP, VT, Custom);
+ }
+
+ for (auto VT : {MVT::v64i8, MVT::v32i16}) {
+ setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::CTPOP, VT,
+ Subtarget.hasBITALG() ? Legal : Custom);
+ setOperationAction(ISD::CTLZ, VT, Custom);
+ setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
}
- setOperationAction(ISD::FSHL, MVT::v64i8, Custom);
- setOperationAction(ISD::FSHR, MVT::v64i8, Custom);
- setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
- setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
- setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
- setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
+ setOperationAction(ISD::FSHL, MVT::v64i8, Custom);
+ setOperationAction(ISD::FSHR, MVT::v64i8, Custom);
+ setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
+ setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
+ setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
+ setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
if (Subtarget.hasDQI() || Subtarget.hasFP16())
for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT})
- setOperationAction(Opc, MVT::v8i64, Custom);
+ setOperationAction(Opc, MVT::v8i64, Custom);
if (Subtarget.hasDQI())
- setOperationAction(ISD::MUL, MVT::v8i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v8i64, Legal);
if (Subtarget.hasCDI()) {
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
- for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
- setOperationAction(ISD::CTLZ, VT, Legal);
+ for (auto VT : {MVT::v16i32, MVT::v8i64}) {
+ setOperationAction(ISD::CTLZ, VT, Legal);
}
} // Subtarget.hasCDI()
if (Subtarget.hasVPOPCNTDQ()) {
- for (auto VT : { MVT::v16i32, MVT::v8i64 })
+ for (auto VT : {MVT::v16i32, MVT::v8i64})
setOperationAction(ISD::CTPOP, VT, Legal);
}
// Extract subvector is special because the value type
// (result) is 256-bit but the source is 512-bit wide.
// 128-bit was made Legal under AVX1.
- for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
- MVT::v16f16, MVT::v8f32, MVT::v4f64 })
+ for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
+ MVT::v16f16, MVT::v8f32, MVT::v4f64})
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
- for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
- MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
- setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Custom);
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ for (auto VT : {MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
+ MVT::v32f16, MVT::v16f32, MVT::v8f64}) {
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
}
setF16Action(MVT::v32f16, Expand);
setOperationAction(ISD::FP_ROUND, MVT::v16f16, Custom);
@@ -2075,20 +2077,20 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32);
setOperationAction(ISD::SETCC, MVT::v32f16, Custom);
- for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
- setOperationAction(ISD::MLOAD, VT, Legal);
- setOperationAction(ISD::MSTORE, VT, Legal);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
+ for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64}) {
+ setOperationAction(ISD::MLOAD, VT, Legal);
+ setOperationAction(ISD::MSTORE, VT, Legal);
+ setOperationAction(ISD::MGATHER, VT, Custom);
+ setOperationAction(ISD::MSCATTER, VT, Custom);
}
if (HasBWI) {
- for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
- setOperationAction(ISD::MLOAD, VT, Legal);
- setOperationAction(ISD::MSTORE, VT, Legal);
+ for (auto VT : {MVT::v64i8, MVT::v32i16}) {
+ setOperationAction(ISD::MLOAD, VT, Legal);
+ setOperationAction(ISD::MSTORE, VT, Legal);
}
} else {
setOperationAction(ISD::STORE, MVT::v32i16, Custom);
- setOperationAction(ISD::STORE, MVT::v64i8, Custom);
+ setOperationAction(ISD::STORE, MVT::v64i8, Custom);
}
if (Subtarget.hasVBMI2()) {
@@ -2104,7 +2106,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FNEG, MVT::v32f16, Custom);
setOperationAction(ISD::FABS, MVT::v32f16, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::v32f16, Custom);
- }// useAVX512Regs
+ } // useAVX512Regs
if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,
@@ -2121,9 +2123,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// These operations are handled on non-VLX by artificially widening in
// isel patterns.
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
if (Subtarget.hasDQI()) {
// Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
@@ -2132,31 +2134,31 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&
"Unexpected operation action!");
// v2i64 FP_TO_S/UINT(v2f32) custom conversion.
- setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
}
- for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
+ for (auto VT : {MVT::v2i64, MVT::v4i64}) {
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
- setOperationAction(ISD::ABS, VT, Legal);
+ setOperationAction(ISD::ABS, VT, Legal);
}
- for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
- setOperationAction(ISD::ROTL, VT, Custom);
- setOperationAction(ISD::ROTR, VT, Custom);
+ for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64}) {
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
}
// Custom legalize 2x32 to get a little better code.
setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
- for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
- MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
+ for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32,
+ MVT::v8f32, MVT::v2f64, MVT::v4f64})
setOperationAction(ISD::MSCATTER, VT, Custom);
if (Subtarget.hasDQI()) {
@@ -2171,13 +2173,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (Subtarget.hasCDI()) {
- for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
- setOperationAction(ISD::CTLZ, VT, Legal);
+ for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64}) {
+ setOperationAction(ISD::CTLZ, VT, Legal);
}
} // Subtarget.hasCDI()
if (Subtarget.hasVPOPCNTDQ()) {
- for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
+ for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64})
setOperationAction(ISD::CTPOP, VT, Legal);
}
@@ -2214,32 +2216,32 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// This block control legalization of v32i1/v64i1 which are available with
// AVX512BW..
if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
- addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
- addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
+ addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
+ addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
- for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
- setOperationAction(ISD::VSELECT, VT, Expand);
- setOperationAction(ISD::TRUNCATE, VT, Custom);
- setOperationAction(ISD::SETCC, VT, Custom);
+ for (auto VT : {MVT::v32i1, MVT::v64i1}) {
+ setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::TRUNCATE, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
}
- for (auto VT : { MVT::v16i1, MVT::v32i1 })
+ for (auto VT : {MVT::v16i1, MVT::v32i1})
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
// Extends from v32i1 masks to 256-bit vectors.
- setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
- setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
- for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
- setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
+ for (auto VT : {MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16}) {
+ setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
}
@@ -2248,119 +2250,119 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
if (Subtarget.hasBITALG()) {
- for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
+ for (auto VT : {MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16})
setOperationAction(ISD::CTPOP, VT, Legal);
}
}
if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
- auto setGroup = [&] (MVT VT) {
- setOperationAction(ISD::FADD, VT, Legal);
- setOperationAction(ISD::STRICT_FADD, VT, Legal);
- setOperationAction(ISD::FSUB, VT, Legal);
- setOperationAction(ISD::STRICT_FSUB, VT, Legal);
- setOperationAction(ISD::FMUL, VT, Legal);
- setOperationAction(ISD::STRICT_FMUL, VT, Legal);
- setOperationAction(ISD::FDIV, VT, Legal);
- setOperationAction(ISD::STRICT_FDIV, VT, Legal);
- setOperationAction(ISD::FSQRT, VT, Legal);
- setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
-
- setOperationAction(ISD::FFLOOR, VT, Legal);
- setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
- setOperationAction(ISD::FCEIL, VT, Legal);
- setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
- setOperationAction(ISD::FTRUNC, VT, Legal);
- setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
- setOperationAction(ISD::FRINT, VT, Legal);
- setOperationAction(ISD::STRICT_FRINT, VT, Legal);
- setOperationAction(ISD::FNEARBYINT, VT, Legal);
- setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
+ auto setGroup = [&](MVT VT) {
+ setOperationAction(ISD::FADD, VT, Legal);
+ setOperationAction(ISD::STRICT_FADD, VT, Legal);
+ setOperationAction(ISD::FSUB, VT, Legal);
+ setOperationAction(ISD::STRICT_FSUB, VT, Legal);
+ setOperationAction(ISD::FMUL, VT, Legal);
+ setOperationAction(ISD::STRICT_FMUL, VT, Legal);
+ setOperationAction(ISD::FDIV, VT, Legal);
+ setOperationAction(ISD::STRICT_FDIV, VT, Legal);
+ setOperationAction(ISD::FSQRT, VT, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
+
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+ setOperationAction(ISD::FRINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+ setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
setOperationAction(ISD::FROUNDEVEN, VT, Legal);
setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
- setOperationAction(ISD::FROUND, VT, Custom);
+ setOperationAction(ISD::FROUND, VT, Custom);
- setOperationAction(ISD::LOAD, VT, Legal);
- setOperationAction(ISD::STORE, VT, Legal);
+ setOperationAction(ISD::LOAD, VT, Legal);
+ setOperationAction(ISD::STORE, VT, Legal);
- setOperationAction(ISD::FMA, VT, Legal);
- setOperationAction(ISD::STRICT_FMA, VT, Legal);
- setOperationAction(ISD::VSELECT, VT, Legal);
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::FMA, VT, Legal);
+ setOperationAction(ISD::STRICT_FMA, VT, Legal);
+ setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::FNEG, VT, Custom);
- setOperationAction(ISD::FABS, VT, Custom);
- setOperationAction(ISD::FCOPYSIGN, VT, Custom);
+ setOperationAction(ISD::FNEG, VT, Custom);
+ setOperationAction(ISD::FABS, VT, Custom);
+ setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
- setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
};
// AVX512_FP16 scalar operations
setGroup(MVT::f16);
- setOperationAction(ISD::FREM, MVT::f16, Promote);
- setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote);
- setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
- setOperationAction(ISD::BR_CC, MVT::f16, Expand);
- setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
- setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal);
- setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
- setOperationAction(ISD::FMAXIMUM, MVT::f16, Custom);
- setOperationAction(ISD::FMINIMUM, MVT::f16, Custom);
- setOperationAction(ISD::FMAXIMUMNUM, MVT::f16, Custom);
- setOperationAction(ISD::FMINIMUMNUM, MVT::f16, Custom);
- setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
- setOperationAction(ISD::LRINT, MVT::f16, Legal);
- setOperationAction(ISD::LLRINT, MVT::f16, Legal);
+ setOperationAction(ISD::FREM, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote);
+ setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f16, Expand);
+ setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
+ setOperationAction(ISD::FMAXIMUM, MVT::f16, Custom);
+ setOperationAction(ISD::FMINIMUM, MVT::f16, Custom);
+ setOperationAction(ISD::FMAXIMUMNUM, MVT::f16, Custom);
+ setOperationAction(ISD::FMINIMUMNUM, MVT::f16, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
+ setOperationAction(ISD::LRINT, MVT::f16, Legal);
+ setOperationAction(ISD::LLRINT, MVT::f16, Legal);
setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
if (Subtarget.useAVX512Regs()) {
setGroup(MVT::v32f16);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
- setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
- setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
- setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);
-
- setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom);
- setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i8,
MVT::v32i16);
- setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i8,
MVT::v32i16);
- setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i1,
MVT::v32i16);
- setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i1,
MVT::v32i16);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom);
- setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);
setOperationAction(ISD::FMINIMUM, MVT::v32f16, Custom);
@@ -2375,35 +2377,35 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setGroup(MVT::v8f16);
setGroup(MVT::v16f16);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal);
- setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal);
-
- setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
- setOperationAction(ISD::FP_ROUND, MVT::v8f16, Legal);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
- setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
- setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v8f16, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
// INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16f16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16f16, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f16, Legal);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16f16, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f16, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f16, Legal);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16f16, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f16, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Legal);
@@ -2411,7 +2413,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Legal);
// Need to custom widen these to prevent scalarization.
- setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
setOperationAction(ISD::STORE, MVT::v4f16, Custom);
setOperationAction(ISD::FMINIMUM, MVT::v8f16, Custom);
@@ -2506,52 +2508,52 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
- setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
+ setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
- setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
+ setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
- setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
+ setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
- setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
+ setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
if (Subtarget.hasBWI()) {
- setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
- setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
+ setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
+ setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
}
if (Subtarget.hasFP16()) {
// vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
- setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f16, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f16, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f16, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f16, Custom);
// vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
- setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f16, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f16, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f16, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f16, Custom);
// vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
- setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom);
- setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom);
// vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
- setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom);
- setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
}
}
@@ -2573,7 +2575,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// FIXME: We really should do custom legalization for addition and
// subtraction on x86-32 once PR3203 is fixed. We really can't do much better
// than generic legalization for 64-bit multiplication-with-overflow, though.
- for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+ for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
// Add/Sub/Mul with overflow operations are custom lowered.
@@ -2853,8 +2855,9 @@ static bool isLogicOp(unsigned Opcode) {
}
static bool isTargetShuffle(unsigned Opcode) {
- switch(Opcode) {
- default: return false;
+ switch (Opcode) {
+ default:
+ return false;
case X86ISD::BLENDI:
case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
@@ -2895,7 +2898,8 @@ static bool isTargetShuffle(unsigned Opcode) {
static bool isTargetShuffleVariableMask(unsigned Opcode) {
switch (Opcode) {
- default: return false;
+ default:
+ return false;
// Target Shuffles.
case X86ISD::PSHUFB:
case X86ISD::VPERMILPV:
@@ -2921,9 +2925,8 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
unsigned SlotSize = RegInfo->getSlotSize();
- ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
- -(int64_t)SlotSize,
- false);
+ ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(
+ SlotSize, -(int64_t)SlotSize, false);
FuncInfo->setRAIndex(ReturnAddrIndex);
}
@@ -2981,7 +2984,7 @@ static bool isX86CCSigned(X86::CondCode X86CC) {
static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
switch (SetCCOpcode) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Invalid integer condition!");
case ISD::SETEQ: return X86::COND_E;
case ISD::SETGT: return X86::COND_G;
@@ -2993,7 +2996,7 @@ static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
case ISD::SETUGT: return X86::COND_A;
case ISD::SETULE: return X86::COND_BE;
case ISD::SETUGE: return X86::COND_AE;
- // clang-format on
+ // clang-format on
}
}
@@ -3031,14 +3034,14 @@ static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
// First determine if it is required or is profitable to flip the operands.
// If LHS is a foldable load, but RHS is not, flip the condition.
- if (ISD::isNON_EXTLoad(LHS.getNode()) &&
- !ISD::isNON_EXTLoad(RHS.getNode())) {
+ if (ISD::isNON_EXTLoad(LHS.getNode()) && !ISD::isNON_EXTLoad(RHS.getNode())) {
SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
std::swap(LHS, RHS);
}
switch (SetCCOpcode) {
- default: break;
+ default:
+ break;
case ISD::SETOLT:
case ISD::SETOLE:
case ISD::SETUGT:
@@ -3054,7 +3057,7 @@ static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
// 1 | 0 | 0 | X == Y
// 1 | 1 | 1 | unordered
switch (SetCCOpcode) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Condcode should be pre-legalized away");
case ISD::SETUEQ:
case ISD::SETEQ: return X86::COND_E;
@@ -3076,7 +3079,7 @@ static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
case ISD::SETO: return X86::COND_NP;
case ISD::SETOEQ:
case ISD::SETUNE: return X86::COND_INVALID;
- // clang-format on
+ // clang-format on
}
}
@@ -3111,7 +3114,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MONone;
Info.offset = 0;
- const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
+ const IntrinsicData *IntrData = getIntrinsicWithChain(Intrinsic);
if (!IntrData) {
switch (Intrinsic) {
case Intrinsic::x86_aesenc128kl:
@@ -3204,7 +3207,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case TRUNCATE_TO_MEM_VI32: {
Info.opc = ISD::INTRINSIC_VOID;
Info.ptrVal = I.getArgOperand(0);
- MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
+ MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
ScalarVT = MVT::i8;
@@ -3224,8 +3227,8 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = nullptr;
MVT DataVT = MVT::getVT(I.getType());
MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
- unsigned NumElts = std::min(DataVT.getVectorNumElements(),
- IndexVT.getVectorNumElements());
+ unsigned NumElts =
+ std::min(DataVT.getVectorNumElements(), IndexVT.getVectorNumElements());
Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
Info.align = Align(1);
Info.flags |= MachineMemOperand::MOLoad;
@@ -3236,8 +3239,8 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = nullptr;
MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
- unsigned NumElts = std::min(DataVT.getVectorNumElements(),
- IndexVT.getVectorNumElements());
+ unsigned NumElts =
+ std::min(DataVT.getVectorNumElements(), IndexVT.getVectorNumElements());
Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
Info.align = Align(1);
Info.flags |= MachineMemOperand::MOStore;
@@ -3396,8 +3399,9 @@ bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
// Mask vectors support all subregister combinations and operations that
// extract half of vector.
if (ResVT.getVectorElementType() == MVT::i1)
- return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
- (Index == ResVT.getVectorNumElements()));
+ return Index == 0 ||
+ ((ResVT.getSizeInBits() == SrcVT.getSizeInBits() * 2) &&
+ (Index == ResVT.getVectorNumElements()));
return (Index % ResVT.getVectorNumElements()) == 0;
}
@@ -3457,9 +3461,9 @@ bool X86TargetLowering::isScalarFPTypeInSSEReg(EVT VT) const {
(VT == MVT::f32 && Subtarget.hasSSE1()) || VT == MVT::f16;
}
-bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
- const SelectionDAG &DAG,
- const MachineMemOperand &MMO) const {
+bool X86TargetLowering::isLoadBitCastBeneficial(
+ EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG,
+ const MachineMemOperand &MMO) const {
if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
BitcastVT.getVectorElementType() == MVT::i1)
return false;
@@ -3468,8 +3472,8 @@ bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
return false;
// If both types are legal vectors, it's always ok to convert them.
- if (LoadVT.isVector() && BitcastVT.isVector() &&
- isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
+ if (LoadVT.isVector() && BitcastVT.isVector() && isTypeLegal(LoadVT) &&
+ isTypeLegal(BitcastVT))
return true;
return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
@@ -3493,9 +3497,7 @@ bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
return true;
}
-bool X86TargetLowering::isCtlzFast() const {
- return Subtarget.hasFastLZCNT();
-}
+bool X86TargetLowering::isCtlzFast() const { return Subtarget.hasFastLZCNT(); }
bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
const Instruction &AndI) const {
@@ -3917,8 +3919,7 @@ static bool canWidenShuffleElements(ArrayRef<int> Mask,
return true;
}
-static bool canWidenShuffleElements(ArrayRef<int> Mask,
- const APInt &Zeroable,
+static bool canWidenShuffleElements(ArrayRef<int> Mask, const APInt &Zeroable,
bool V2IsZero,
SmallVectorImpl<int> &WidenedMask) {
// Create an alternative mask with info about zeroable elements.
@@ -4002,7 +4003,7 @@ bool X86::isZeroNode(SDValue Elt) {
static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
const SDLoc &dl, bool IsMask = false) {
- SmallVector<SDValue, 32> Ops;
+ SmallVector<SDValue, 32> Ops;
bool Split = false;
MVT ConstVecVT = VT;
@@ -4016,12 +4017,12 @@ static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
MVT EltVT = ConstVecVT.getVectorElementType();
for (unsigned i = 0; i < NumElts; ++i) {
bool IsUndef = Values[i] < 0 && IsMask;
- SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
- DAG.getConstant(Values[i], dl, EltVT);
+ SDValue OpNode =
+ IsUndef ? DAG.getUNDEF(EltVT) : DAG.getConstant(Values[i], dl, EltVT);
Ops.push_back(OpNode);
if (Split)
- Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
- DAG.getConstant(0, dl, EltVT));
+ Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT)
+ : DAG.getConstant(0, dl, EltVT));
}
SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
if (Split)
@@ -4029,8 +4030,8 @@ static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
return ConstsNode;
}
-static SDValue getConstVector(ArrayRef<APInt> Bits, const APInt &Undefs,
- MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
+static SDValue getConstVector(ArrayRef<APInt> Bits, const APInt &Undefs, MVT VT,
+ SelectionDAG &DAG, const SDLoc &dl) {
assert(Bits.size() == Undefs.getBitWidth() &&
"Unequal constant and undef arrays");
SmallVector<SDValue, 32> Ops;
@@ -4065,8 +4066,8 @@ static SDValue getConstVector(ArrayRef<APInt> Bits, const APInt &Undefs,
return DAG.getBitcast(VT, ConstsNode);
}
-static SDValue getConstVector(ArrayRef<APInt> Bits, MVT VT,
- SelectionDAG &DAG, const SDLoc &dl) {
+static SDValue getConstVector(ArrayRef<APInt> Bits, MVT VT, SelectionDAG &DAG,
+ const SDLoc &dl) {
APInt Undefs = APInt::getZero(Bits.size());
return getConstVector(Bits, Undefs, VT, DAG, dl);
}
@@ -4165,7 +4166,8 @@ static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, const SDLoc &dl) {
assert((Vec.getValueType().is256BitVector() ||
- Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
+ Vec.getValueType().is512BitVector()) &&
+ "Unexpected vector size!");
return extractSubVector(Vec, IdxVal, DAG, dl, 128);
}
@@ -4189,7 +4191,7 @@ static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
EVT ResultVT = Result.getValueType();
// Insert the relevant vectorWidth bits.
- unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
+ unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
// This is the index of the first element of the vectorWidth-bit chunk
@@ -4587,8 +4589,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
// May need to promote to a legal type.
Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
- DAG.getConstant(0, dl, WideOpVT),
- SubVec, Idx);
+ DAG.getConstant(0, dl, WideOpVT), SubVec, Idx);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
}
@@ -4603,20 +4604,18 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
if (IdxVal == 0) {
// Zero lower bits of the Vec
SDValue ShiftBits = DAG.getTargetConstant(SubVecNumElems, dl, MVT::i8);
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
- ZeroIdx);
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
// Merge them together, SubVec should be zero extended.
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
- DAG.getConstant(0, dl, WideOpVT),
- SubVec, ZeroIdx);
+ DAG.getConstant(0, dl, WideOpVT), SubVec, ZeroIdx);
Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
}
- SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
- Undef, SubVec, ZeroIdx);
+ SubVec =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, SubVec, ZeroIdx);
if (Vec.isUndef()) {
assert(IdxVal != 0 && "Unexpected index");
@@ -4654,12 +4653,11 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
// isel to optimize when bits are known zero.
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
- DAG.getConstant(0, dl, WideOpVT),
- Vec, ZeroIdx);
+ DAG.getConstant(0, dl, WideOpVT), Vec, ZeroIdx);
} else {
// Otherwise use explicit shifts to zero the bits.
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
- Undef, Vec, ZeroIdx);
+ Vec =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
NumElems = WideOpVT.getVectorNumElements();
SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8);
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
@@ -4712,9 +4710,9 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
// Isolate the bits after the last inserted bit.
unsigned HighShift = IdxVal + SubVecNumElems;
SDValue High = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
- DAG.getTargetConstant(HighShift, dl, MVT::i8));
+ DAG.getTargetConstant(HighShift, dl, MVT::i8));
High = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, High,
- DAG.getTargetConstant(HighShift, dl, MVT::i8));
+ DAG.getTargetConstant(HighShift, dl, MVT::i8));
// Now OR all 3 pieces together.
Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Low, High);
@@ -4795,8 +4793,8 @@ static SDValue getBitSelect(const SDLoc &DL, MVT VT, SDValue LHS, SDValue RHS,
return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
}
-void llvm::createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask,
- bool Lo, bool Unary) {
+void llvm::createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
+ bool Unary) {
assert(VT.getScalarType().isSimple() && (VT.getSizeInBits() % 128) == 0 &&
"Illegal vector type to unpack");
assert(Mask.empty() && "Expected an empty shuffle mask vector");
@@ -4933,13 +4931,12 @@ static SDValue getPack(SelectionDAG &DAG, const X86Subtarget &Subtarget,
/// This produces a shuffle where the low element of V2 is swizzled into the
/// zero/undef vector, landing at element Idx.
/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
-static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
- bool IsZero,
+static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx, bool IsZero,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = V2.getSimpleValueType();
- SDValue V1 = IsZero
- ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
+ SDValue V1 =
+ IsZero ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
int NumElems = VT.getVectorNumElements();
SmallVector<int, 16> MaskVec(NumElems);
for (int i = 0; i != NumElems; ++i)
@@ -5832,9 +5829,9 @@ static bool getTargetShuffleMask(SDValue N, bool AllowSentinelZero,
/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
/// as many lanes with this technique as possible to simplify the remaining
/// shuffle.
-static void computeZeroableShuffleElements(ArrayRef<int> Mask,
- SDValue V1, SDValue V2,
- APInt &KnownUndef, APInt &KnownZero) {
+static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, APInt &KnownUndef,
+ APInt &KnownZero) {
int Size = Mask.size();
KnownUndef = KnownZero = APInt::getZero(Size);
@@ -6020,7 +6017,7 @@ static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask,
static void resolveTargetShuffleFromZeroables(SmallVectorImpl<int> &Mask,
const APInt &KnownUndef,
const APInt &KnownZero,
- bool ResolveKnownZeros= true) {
+ bool ResolveKnownZeros = true) {
unsigned NumElts = Mask.size();
assert(KnownUndef.getBitWidth() == NumElts &&
KnownZero.getBitWidth() == NumElts && "Shuffle mask size mismatch");
@@ -6990,8 +6987,8 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, const SDLoc &DL,
MVT EltVT = VT.getVectorElementType();
// Create a new build vector with the first 2 elements followed by undef
// padding, bitcast to v2f64, duplicate, and bitcast back.
- SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1),
- DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) };
+ SDValue Ops[4] = {Op.getOperand(0), Op.getOperand(1), DAG.getUNDEF(EltVT),
+ DAG.getUNDEF(EltVT)};
SDValue NewBV = DAG.getBitcast(MVT::v2f64, DAG.getBuildVector(VT, DL, Ops));
SDValue Dup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, NewBV);
return DAG.getBitcast(VT, Dup);
@@ -7039,7 +7036,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, const SDLoc &DL,
for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
if (Zeroable[EltIdx]) {
// The zero vector will be on the right hand side.
- Mask[EltIdx] = EltIdx+4;
+ Mask[EltIdx] = EltIdx + 4;
continue;
}
@@ -7250,7 +7247,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
APInt ZeroMask = APInt::getZero(NumElems);
APInt UndefMask = APInt::getZero(NumElems);
- SmallVector<LoadSDNode*, 8> Loads(NumElems, nullptr);
+ SmallVector<LoadSDNode *, 8> Loads(NumElems, nullptr);
SmallVector<int64_t, 8> ByteOffsets(NumElems, 0);
// For each element in the initializer, see if we've found a load, zero or an
@@ -7345,8 +7342,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) {
auto MMOFlags = LDBase->getMemOperand()->getFlags();
- assert(LDBase->isSimple() &&
- "Cannot merge volatile or atomic loads.");
+ assert(LDBase->isSimple() && "Cannot merge volatile or atomic loads.");
SDValue NewLd =
DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(), LDBase->getBaseAlign(), MMOFlags);
@@ -7434,7 +7430,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
VecVT = MVT::v4f32;
if (TLI.isTypeLegal(VecVT)) {
SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
- SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
+ SDValue Ops[] = {LDBase->getChain(), LDBase->getBasePtr()};
SDValue ResNode = DAG.getMemIntrinsicNode(
X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT, LDBase->getPointerInfo(),
LDBase->getBaseAlign(), MachineMemOperand::MOLoad);
@@ -8038,9 +8034,9 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl,
return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Select, Select);
} else {
MVT ImmVT = MVT::getIntegerVT(std::max((unsigned)VT.getSizeInBits(), 8U));
- SDValue Select = DAG.getSelect(dl, ImmVT, Cond,
- DAG.getAllOnesConstant(dl, ImmVT),
- DAG.getConstant(0, dl, ImmVT));
+ SDValue Select =
+ DAG.getSelect(dl, ImmVT, Cond, DAG.getAllOnesConstant(dl, ImmVT),
+ DAG.getConstant(0, dl, ImmVT));
MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1;
Select = DAG.getBitcast(VecVT, Select);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Select,
@@ -8150,10 +8146,10 @@ static bool isHorizontalBinOpPart(const BuildVectorSDNode *N, unsigned Opcode,
// Try to match the following pattern:
// (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- Op0.getOperand(0) == Op1.getOperand(0) &&
- isa<ConstantSDNode>(Op0.getOperand(1)) &&
- isa<ConstantSDNode>(Op1.getOperand(1)));
+ Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOperand(0) == Op1.getOperand(0) &&
+ isa<ConstantSDNode>(Op0.getOperand(1)) &&
+ isa<ConstantSDNode>(Op1.getOperand(1)));
if (!CanFold)
break;
@@ -8233,9 +8229,9 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
unsigned NumElts = VT.getVectorNumElements();
SDValue V0_LO = extract128BitVector(V0, 0, DAG, DL);
- SDValue V0_HI = extract128BitVector(V0, NumElts/2, DAG, DL);
+ SDValue V0_HI = extract128BitVector(V0, NumElts / 2, DAG, DL);
SDValue V1_LO = extract128BitVector(V1, 0, DAG, DL);
- SDValue V1_HI = extract128BitVector(V1, NumElts/2, DAG, DL);
+ SDValue V1_HI = extract128BitVector(V1, NumElts / 2, DAG, DL);
MVT NewVT = V0_LO.getSimpleValueType();
SDValue LO = DAG.getUNDEF(NewVT);
@@ -8267,8 +8263,7 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
const X86Subtarget &Subtarget, SelectionDAG &DAG,
SDValue &Opnd0, SDValue &Opnd1,
- unsigned &NumExtracts,
- bool &IsSubAdd) {
+ unsigned &NumExtracts, bool &IsSubAdd) {
MVT VT = BV->getSimpleValueType(0);
if (!Subtarget.hasSSE3() || !VT.isFloatingPoint())
@@ -8354,8 +8349,8 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
// Ensure we have found an opcode for both parities and that they are
// different. Don't try to fold this build_vector into an ADDSUB/SUBADD if the
// inputs are undef.
- if (!Opc[0] || !Opc[1] || Opc[0] == Opc[1] ||
- InVec0.isUndef() || InVec1.isUndef())
+ if (!Opc[0] || !Opc[1] || Opc[0] == Opc[1] || InVec0.isUndef() ||
+ InVec1.isUndef())
return false;
IsSubAdd = Opc[0] == ISD::FADD;
@@ -8368,7 +8363,8 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
/// Returns true if is possible to fold MUL and an idiom that has already been
/// recognized as ADDSUB/SUBADD(\p Opnd0, \p Opnd1) into
/// FMADDSUB/FMSUBADD(x, y, \p Opnd1). If (and only if) true is returned, the
-/// operands of FMADDSUB/FMSUBADD are written to parameters \p Opnd0, \p Opnd1, \p Opnd2.
+/// operands of FMADDSUB/FMSUBADD are written to parameters \p Opnd0, \p Opnd1,
+/// \p Opnd2.
///
/// Prior to calling this function it should be known that there is some
/// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation
@@ -8392,8 +8388,8 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
/// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
/// FMADDSUB is.
static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget,
- SelectionDAG &DAG,
- SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2,
+ SelectionDAG &DAG, SDValue &Opnd0,
+ SDValue &Opnd1, SDValue &Opnd2,
unsigned ExpectedUses) {
if (Opnd0.getOpcode() != ISD::FMUL ||
!Opnd0->hasNUsesOfValue(ExpectedUses, 0) || !Subtarget.hasAnyFMA())
@@ -8448,8 +8444,8 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
if (VT.is512BitVector()) {
SmallVector<int> Mask;
for (int I = 0, E = VT.getVectorNumElements(); I != E; I += 2) {
- Mask.push_back(I);
- Mask.push_back(I + E + 1);
+ Mask.push_back(I);
+ Mask.push_back(I + E + 1);
}
SDValue Sub = DAG.getNode(ISD::FSUB, DL, VT, Opnd0, Opnd1);
SDValue Add = DAG.getNode(ISD::FADD, DL, VT, Opnd0, Opnd1);
@@ -8490,13 +8486,13 @@ static bool isHopBuildVector(const BuildVectorSDNode *BV, SelectionDAG &DAG,
if (HOpcode == ISD::DELETED_NODE) {
GenericOpcode = Op.getOpcode();
switch (GenericOpcode) {
- // clang-format off
+ // clang-format off
case ISD::ADD: HOpcode = X86ISD::HADD; break;
case ISD::SUB: HOpcode = X86ISD::HSUB; break;
case ISD::FADD: HOpcode = X86ISD::FHADD; break;
case ISD::FSUB: HOpcode = X86ISD::FHSUB; break;
default: return false;
- // clang-format on
+ // clang-format on
}
}
@@ -8526,8 +8522,7 @@ static bool isHopBuildVector(const BuildVectorSDNode *BV, SelectionDAG &DAG,
// op (extract_vector_elt A, I), (extract_vector_elt A, I+1)
unsigned ExtIndex0 = Op0.getConstantOperandVal(1);
unsigned ExtIndex1 = Op1.getConstantOperandVal(1);
- unsigned ExpectedIndex = i * NumEltsIn128Bits +
- (j % NumEltsIn64Bits) * 2;
+ unsigned ExpectedIndex = i * NumEltsIn128Bits + (j % NumEltsIn64Bits) * 2;
if (ExpectedIndex == ExtIndex0 && ExtIndex1 == ExtIndex0 + 1)
continue;
@@ -9171,8 +9166,8 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, const SDLoc &DL,
return createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
}
-SDValue
-X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
@@ -9396,14 +9391,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
// Is it a vector logical left shift?
- if (NumElems == 2 && Idx == 1 &&
- X86::isZeroNode(Op.getOperand(0)) &&
+ if (NumElems == 2 && Idx == 1 && X86::isZeroNode(Op.getOperand(0)) &&
!X86::isZeroNode(Op.getOperand(1))) {
unsigned NumBits = VT.getSizeInBits();
- return getVShift(true, VT,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
- VT, Op.getOperand(1)),
- NumBits/2, DAG, *this, dl);
+ return getVShift(
+ true, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(1)),
+ NumBits / 2, DAG, *this, dl);
}
if (IsAllConstants) // Otherwise, it's better to do a constpool load.
@@ -9416,7 +9410,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// place.
if (EVTBits == 32) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
- return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG);
+ return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget,
+ DAG);
}
}
@@ -9455,8 +9450,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// build_vector and broadcast it.
// TODO: We could probably generalize this more.
if (Subtarget.hasAVX2() && EVTBits == 32 && Values.size() == 2) {
- SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1),
- DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) };
+ SDValue Ops[4] = {Op.getOperand(0), Op.getOperand(1), DAG.getUNDEF(EltVT),
+ DAG.getUNDEF(EltVT)};
auto CanSplat = [](SDValue Op, unsigned NumElems, ArrayRef<SDValue> Ops) {
// Make sure all the even/odd operands match.
for (unsigned i = 2; i != NumElems; ++i)
@@ -9472,8 +9467,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DAG.getBuildVector(NarrowVT, dl, Ops));
// Broadcast from v2i64/v2f64 and cast to final VT.
MVT BcastVT = MVT::getVectorVT(WideEltVT, NumElems / 2);
- return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT,
- NewBV));
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT, NewBV));
}
}
@@ -9486,7 +9481,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
SDValue Lower =
DAG.getBuildVector(HVT, dl, Op->ops().slice(0, NumElems / 2));
SDValue Upper = DAG.getBuildVector(
- HVT, dl, Op->ops().slice(NumElems / 2, NumElems /2));
+ HVT, dl, Op->ops().slice(NumElems / 2, NumElems / 2));
// Recreate the wider vector with the lower and upper part.
return concatSubVectors(Lower, Upper, DAG, dl);
@@ -9497,8 +9492,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (NumNonZero == 1) {
// One half is zero or undef.
unsigned Idx = NonZeroMask.countr_zero();
- SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
- Op.getOperand(Idx));
+ SDValue V2 =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(Idx));
return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
}
return SDValue();
@@ -9533,30 +9528,28 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
for (unsigned i = 0; i < 2; ++i) {
switch (NonZeroMask.extractBitsAsZExtValue(2, i * 2)) {
- default: llvm_unreachable("Unexpected NonZero count");
- case 0:
- Ops[i] = Ops[i*2]; // Must be a zero vector.
- break;
- case 1:
- Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2+1], Ops[i*2]);
- break;
- case 2:
- Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2], Ops[i*2+1]);
- break;
- case 3:
- Ops[i] = getUnpackl(DAG, dl, VT, Ops[i*2], Ops[i*2+1]);
- break;
+ default:
+ llvm_unreachable("Unexpected NonZero count");
+ case 0:
+ Ops[i] = Ops[i * 2]; // Must be a zero vector.
+ break;
+ case 1:
+ Ops[i] = getMOVL(DAG, dl, VT, Ops[i * 2 + 1], Ops[i * 2]);
+ break;
+ case 2:
+ Ops[i] = getMOVL(DAG, dl, VT, Ops[i * 2], Ops[i * 2 + 1]);
+ break;
+ case 3:
+ Ops[i] = getUnpackl(DAG, dl, VT, Ops[i * 2], Ops[i * 2 + 1]);
+ break;
}
}
bool Reverse1 = NonZeroMask.extractBitsAsZExtValue(2, 0) == 2;
bool Reverse2 = NonZeroMask.extractBitsAsZExtValue(2, 2) == 2;
- int MaskVec[] = {
- Reverse1 ? 1 : 0,
- Reverse1 ? 0 : 1,
- static_cast<int>(Reverse2 ? NumElems+1 : NumElems),
- static_cast<int>(Reverse2 ? NumElems : NumElems+1)
- };
+ int MaskVec[] = {Reverse1 ? 1 : 0, Reverse1 ? 0 : 1,
+ static_cast<int>(Reverse2 ? NumElems + 1 : NumElems),
+ static_cast<int>(Reverse2 ? NumElems : NumElems + 1)};
return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], MaskVec);
}
@@ -9575,7 +9568,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
Result = DAG.getUNDEF(VT);
for (unsigned i = 1; i < NumElems; ++i) {
- if (Op.getOperand(i).isUndef()) continue;
+ if (Op.getOperand(i).isUndef())
+ continue;
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
Op.getOperand(i), DAG.getVectorIdxConstant(i, dl));
}
@@ -9600,14 +9594,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) {
// Generate scaled UNPCKL shuffle mask.
SmallVector<int, 16> Mask;
- for(unsigned i = 0; i != Scale; ++i)
+ for (unsigned i = 0; i != Scale; ++i)
Mask.push_back(i);
for (unsigned i = 0; i != Scale; ++i)
- Mask.push_back(NumElems+i);
+ Mask.push_back(NumElems + i);
Mask.append(NumElems - Mask.size(), SM_SentinelUndef);
for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i)
- Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask);
+ Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2 * i], Ops[(2 * i) + 1], Mask);
}
return Ops[0];
}
@@ -9620,8 +9614,8 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
SDLoc dl(Op);
MVT ResVT = Op.getSimpleValueType();
- assert((ResVT.is256BitVector() ||
- ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide");
+ assert((ResVT.is256BitVector() || ResVT.is512BitVector()) &&
+ "Value type must be 256-/512-bit wide");
unsigned NumOperands = Op.getNumOperands();
unsigned NumFreezeUndef = 0;
@@ -9634,15 +9628,14 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
if (SubVec.isUndef())
continue;
if (ISD::isFreezeUndef(SubVec.getNode())) {
- // If the freeze(undef) has multiple uses then we must fold to zero.
- if (SubVec.hasOneUse()) {
- ++NumFreezeUndef;
- } else {
- ++NumZero;
- Undefs.insert(SubVec);
- }
- }
- else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
+ // If the freeze(undef) has multiple uses then we must fold to zero.
+ if (SubVec.hasOneUse()) {
+ ++NumFreezeUndef;
+ } else {
+ ++NumZero;
+ Undefs.insert(SubVec);
+ }
+ } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
++NumZero;
else {
assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
@@ -9656,9 +9649,9 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
ArrayRef<SDUse> Ops = Op->ops();
SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
- Ops.slice(0, NumOperands/2));
+ Ops.slice(0, NumOperands / 2));
SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
- Ops.slice(NumOperands/2));
+ Ops.slice(NumOperands / 2));
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
@@ -9691,7 +9684,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
// TODO: Merge this with LowerAVXCONCAT_VECTORS?
static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
const X86Subtarget &Subtarget,
- SelectionDAG & DAG) {
+ SelectionDAG &DAG) {
SDLoc dl(Op);
MVT ResVT = Op.getSimpleValueType();
unsigned NumOperands = Op.getNumOperands();
@@ -9764,16 +9757,15 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
DAG.getVectorIdxConstant(NumElems / 2, dl));
}
-static SDValue LowerCONCAT_VECTORS(SDValue Op,
- const X86Subtarget &Subtarget,
+static SDValue LowerCONCAT_VECTORS(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
if (VT.getVectorElementType() == MVT::i1)
return LowerCONCAT_VECTORSvXi1(Op, Subtarget, DAG);
assert((VT.is256BitVector() && Op.getNumOperands() == 2) ||
- (VT.is512BitVector() && (Op.getNumOperands() == 2 ||
- Op.getNumOperands() == 4)));
+ (VT.is512BitVector() &&
+ (Op.getNumOperands() == 2 || Op.getNumOperands() == 4)));
// AVX can use the vinsertf128 instruction to create 256-bit vectors
// from two other 128-bit ones.
@@ -9890,8 +9882,8 @@ static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
// Ok, handle the in-lane shuffles by detecting if and when they repeat.
// Adjust second vector indices to start at LaneSize instead of Size.
- int LocalM = Mask[i] < Size ? Mask[i] % LaneSize
- : Mask[i] % LaneSize + LaneSize;
+ int LocalM =
+ Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
if (RepeatedMask[i % LaneSize] < 0)
// This is the first non-undef entry in this slot of a 128-bit lane.
RepeatedMask[i % LaneSize] = LocalM;
@@ -9909,8 +9901,7 @@ is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask);
}
-static bool
-is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask) {
+static bool is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask) {
SmallVector<int, 32> RepeatedMask;
return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask);
}
@@ -10296,8 +10287,8 @@ static SDValue getSHUFPDImmForMask(ArrayRef<int> Mask, const SDLoc &DL,
//
// The function looks for a sub-mask that the nonzero elements are in
// increasing order. If such sub-mask exist. The function returns true.
-static bool isNonZeroElementsInOrder(const APInt &Zeroable,
- ArrayRef<int> Mask, const EVT &VectorType,
+static bool isNonZeroElementsInOrder(const APInt &Zeroable, ArrayRef<int> Mask,
+ const EVT &VectorType,
bool &IsZeroSideLeft) {
int NextElement = -1;
// Check if the Mask's nonzero elements are in increasing order.
@@ -11082,7 +11073,7 @@ static bool matchShuffleAsBlend(MVT VT, SDValue V1, SDValue V2,
if (M == SM_SentinelUndef)
continue;
if (M == Elt || (0 <= M && M < NumElts &&
- IsElementEquivalent(NumElts, V1, V1, M, Elt))) {
+ IsElementEquivalent(NumElts, V1, V1, M, Elt))) {
Mask[Elt] = Elt;
LaneV1InUse = true;
continue;
@@ -11215,8 +11206,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
// If we have VPTERNLOG, we can use that as a bit blend.
if (Subtarget.hasVLX())
- if (SDValue BitBlend =
- lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
+ if (SDValue BitBlend = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
return BitBlend;
// Scale the blend by the number of bytes per element.
@@ -11524,9 +11514,11 @@ static SDValue lowerShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
/// Helper to form a PALIGNR-based rotate+permute, merging 2 inputs and then
/// permuting the elements of the result in place.
-static SDValue lowerShuffleAsByteRotateAndPermute(
- const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const X86Subtarget &Subtarget, SelectionDAG &DAG) {
+static SDValue lowerShuffleAsByteRotateAndPermute(const SDLoc &DL, MVT VT,
+ SDValue V1, SDValue V2,
+ ArrayRef<int> Mask,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
if ((VT.is128BitVector() && !Subtarget.hasSSSE3()) ||
(VT.is256BitVector() && !Subtarget.hasAVX2()) ||
(VT.is512BitVector() && !Subtarget.hasBWI()))
@@ -11709,15 +11701,15 @@ static SDValue lowerShuffleAsDecomposedShuffleMerge(
// pre-shuffle first is a better strategy.
if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask)) {
// Only prefer immediate blends to unpack/rotate.
- if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask,
- DAG, true))
+ if (SDValue BlendPerm =
+ lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG, true))
return BlendPerm;
// If either input vector provides only a single element which is repeated
// multiple times, unpacking from both input vectors would generate worse
// code. e.g. for
- // t5: v16i8 = vector_shuffle<16,0,16,1,16,2,16,3,16,4,16,5,16,6,16,7> t2, t4
- // it is better to process t4 first to create a vector of t4[0], then unpack
- // that vector with t2.
+ // t5: v16i8 = vector_shuffle<16,0,16,1,16,2,16,3,16,4,16,5,16,6,16,7> t2,
+ // t4 it is better to process t4 first to create a vector of t4[0], then
+ // unpack that vector with t2.
if (!V1Zero && !V2Zero && !isSingleElementRepeatedMask(V1Mask) &&
!isSingleElementRepeatedMask(V2Mask))
if (SDValue UnpackPerm =
@@ -11727,8 +11719,8 @@ static SDValue lowerShuffleAsDecomposedShuffleMerge(
DL, VT, V1, V2, Mask, Subtarget, DAG))
return RotatePerm;
// Unpack/rotate failed - try again with variable blends.
- if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask,
- DAG))
+ if (SDValue BlendPerm =
+ lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG))
return BlendPerm;
if (VT.getScalarSizeInBits() >= 32)
if (SDValue PermUnpack = lowerShuffleAsPermuteAndUnpack(
@@ -11841,7 +11833,7 @@ static int matchShuffleAsElementRotate(SDValue &V1, SDValue &V2,
SDValue Lo, Hi;
for (int i = 0; i < NumElts; ++i) {
int M = Mask[i];
- assert((M == SM_SentinelUndef || (0 <= M && M < (2*NumElts))) &&
+ assert((M == SM_SentinelUndef || (0 <= M && M < (2 * NumElts))) &&
"Unexpected mask index.");
if (M < 0)
continue;
@@ -11963,8 +11955,7 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1,
"Rotate-based lowering only supports 128-bit lowering!");
assert(Mask.size() <= 16 &&
"Can shuffle at most 16 bytes in a 128-bit vector!");
- assert(ByteVT == MVT::v16i8 &&
- "SSE2 rotate lowering only needed for v16i8!");
+ assert(ByteVT == MVT::v16i8 && "SSE2 rotate lowering only needed for v16i8!");
// Default SSE2 implementation
int LoByteShift = 16 - ByteRotation;
@@ -11999,8 +11990,9 @@ static SDValue lowerShuffleAsVALIGN(const SDLoc &DL, MVT VT, SDValue V1,
"Only 32-bit and 64-bit elements are supported!");
// 128/256-bit vectors are only supported with VLX.
- assert((Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector()))
- && "VLX required for 128/256-bit vectors");
+ assert(
+ (Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector())) &&
+ "VLX required for 128/256-bit vectors");
SDValue Lo = V1, Hi = V2;
int Rotation = matchShuffleAsElementRotate(Lo, Hi, Mask);
@@ -12552,8 +12544,7 @@ static SDValue lowerShuffleAsSpecificExtension(const SDLoc &DL, MVT VT,
/// are both incredibly common and often quite performance sensitive.
static SDValue lowerShuffleAsZeroOrAnyExtend(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const APInt &Zeroable, const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+ const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
int Bits = VT.getSizeInBits();
int NumLanes = Bits / 128;
int NumElements = VT.getVectorNumElements();
@@ -12679,7 +12670,8 @@ static SDValue getScalarValueForVectorElement(SDValue V, int Idx,
// If the bitcasts shift the element size, we can't extract an equivalent
// element from it.
MVT NewVT = V.getSimpleValueType();
- if (!NewVT.isVector() || NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
+ if (!NewVT.isVector() ||
+ NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
return SDValue();
if (V.getOpcode() == ISD::BUILD_VECTOR ||
@@ -12703,7 +12695,7 @@ static bool isShuffleFoldableLoad(SDValue V) {
ISD::isNON_EXTLoad(peekThroughOneUseBitcasts(V).getNode());
}
-template<typename T>
+template <typename T>
static bool isSoftF16(T VT, const X86Subtarget &Subtarget) {
T EltVT = VT.getScalarType();
return (EltVT == MVT::bf16 && !Subtarget.hasAVX10_2()) ||
@@ -12716,8 +12708,7 @@ static bool isSoftF16(T VT, const X86Subtarget &Subtarget) {
/// across all subtarget feature sets.
static SDValue lowerShuffleAsElementInsertion(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const APInt &Zeroable, const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+ const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
MVT ExtVT = VT;
MVT EltVT = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
@@ -12750,8 +12741,8 @@ static SDValue lowerShuffleAsElementInsertion(
// all the smarts here sunk into that routine. However, the current
// lowering of BUILD_VECTOR makes that nearly impossible until the old
// vector shuffle lowering is dead.
- SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(),
- DAG);
+ SDValue V2S =
+ getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(), DAG);
if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) {
// We need to zext the scalar if it is smaller than an i32.
V2S = DAG.getBitcast(EltVT, V2S);
@@ -12954,8 +12945,8 @@ static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0,
// Check that both sources are extracts of the same source vector.
if (N0.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
N1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
- N0.getOperand(0) != N1.getOperand(0) ||
- !N0.hasOneUse() || !N1.hasOneUse())
+ N0.getOperand(0) != N1.getOperand(0) || !N0.hasOneUse() ||
+ !N1.hasOneUse())
return SDValue();
SDValue WideVec = N0.getOperand(0);
@@ -12985,8 +12976,8 @@ static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0,
NewMask.append(NumElts, -1);
// shuf (extract X, 0), (extract X, 4), M --> extract (shuf X, undef, M'), 0
- SDValue Shuf = DAG.getVectorShuffle(WideVT, DL, WideVec, DAG.getUNDEF(WideVT),
- NewMask);
+ SDValue Shuf =
+ DAG.getVectorShuffle(WideVT, DL, WideVec, DAG.getUNDEF(WideVT), NewMask);
// This is free: ymm -> xmm.
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuf,
DAG.getVectorIdxConstant(0, DL));
@@ -13185,8 +13176,8 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
if (!V.getValueType().isVector()) {
assert(V.getScalarValueSizeInBits() == NumEltBits &&
"Unexpected scalar size");
- MVT BroadcastVT = MVT::getVectorVT(V.getSimpleValueType(),
- VT.getVectorNumElements());
+ MVT BroadcastVT =
+ MVT::getVectorVT(V.getSimpleValueType(), VT.getVectorNumElements());
return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
}
@@ -13211,8 +13202,8 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
// elements are zeroable.
static bool matchShuffleAsInsertPS(SDValue &V1, SDValue &V2,
unsigned &InsertPSMask,
- const APInt &Zeroable,
- ArrayRef<int> Mask, SelectionDAG &DAG) {
+ const APInt &Zeroable, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!");
assert(V2.getSimpleValueType().is128BitVector() && "Bad operand type!");
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
@@ -13664,8 +13655,8 @@ static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// when the V2 input is targeting element 0 of the mask -- that is the fast
// case here.
if (NumV2Elements == 1 && Mask[0] >= 4)
- if (SDValue V = lowerShuffleAsElementInsertion(
- DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v4f32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return V;
if (Subtarget.hasSSE41()) {
@@ -13674,8 +13665,8 @@ static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return V;
if (!isSingleSHUFPSMask(Mask))
- if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, MVT::v4f32, V1,
- V2, Mask, DAG))
+ if (SDValue BlendPerm =
+ lowerShuffleAsBlendAndPermute(DL, MVT::v4f32, V1, V2, Mask, DAG))
return BlendPerm;
}
@@ -13765,8 +13756,8 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// There are special ways we can lower some single-element blends.
if (NumV2Elements == 1)
- if (SDValue V = lowerShuffleAsElementInsertion(
- DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v4i32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return V;
// We have different paths for blend lowering, but they all must use the
@@ -13896,7 +13887,7 @@ static SDValue lowerV8I16GeneralSingleInputShuffle(
};
if ((NumHToL + NumHToH) == 0 || (NumLToL + NumLToH) == 0) {
- int PSHUFDMask[4] = { -1, -1, -1, -1 };
+ int PSHUFDMask[4] = {-1, -1, -1, -1};
SmallVector<std::pair<int, int>, 4> DWordPairs;
int DOffset = ((NumHToL + NumHToH) == 0 ? 0 : 2);
@@ -14000,7 +13991,8 @@ static SDValue lowerV8I16GeneralSingleInputShuffle(
int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0];
int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset);
int TripleNonInputIdx =
- TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0);
+ TripleInputSum -
+ std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0);
TripleDWord = TripleNonInputIdx / 2;
// We use xor with one to compute the adjacent DWord to whichever one the
@@ -14078,9 +14070,9 @@ static SDValue lowerV8I16GeneralSingleInputShuffle(
// Adjust the mask to match the new locations of A and B.
for (int &M : Mask)
- if (M >= 0 && M/2 == ADWord)
+ if (M >= 0 && M / 2 == ADWord)
M = 2 * BDWord + M % 2;
- else if (M >= 0 && M/2 == BDWord)
+ else if (M >= 0 && M / 2 == BDWord)
M = 2 * ADWord + M % 2;
// Recurse back into this routine to re-compute state now that this isn't
@@ -14108,33 +14100,33 @@ static SDValue lowerV8I16GeneralSingleInputShuffle(
[&PSHUFDMask](ArrayRef<int> InPlaceInputs, ArrayRef<int> IncomingInputs,
MutableArrayRef<int> SourceHalfMask,
MutableArrayRef<int> HalfMask, int HalfOffset) {
- if (InPlaceInputs.empty())
- return;
- if (InPlaceInputs.size() == 1) {
- SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
- InPlaceInputs[0] - HalfOffset;
- PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2;
- return;
- }
- if (IncomingInputs.empty()) {
- // Just fix all of the in place inputs.
- for (int Input : InPlaceInputs) {
- SourceHalfMask[Input - HalfOffset] = Input - HalfOffset;
- PSHUFDMask[Input / 2] = Input / 2;
- }
- return;
- }
+ if (InPlaceInputs.empty())
+ return;
+ if (InPlaceInputs.size() == 1) {
+ SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
+ InPlaceInputs[0] - HalfOffset;
+ PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2;
+ return;
+ }
+ if (IncomingInputs.empty()) {
+ // Just fix all of the in place inputs.
+ for (int Input : InPlaceInputs) {
+ SourceHalfMask[Input - HalfOffset] = Input - HalfOffset;
+ PSHUFDMask[Input / 2] = Input / 2;
+ }
+ return;
+ }
- assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!");
- SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
- InPlaceInputs[0] - HalfOffset;
- // Put the second input next to the first so that they are packed into
- // a dword. We find the adjacent index by toggling the low bit.
- int AdjIndex = InPlaceInputs[0] ^ 1;
- SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset;
- llvm::replace(HalfMask, InPlaceInputs[1], AdjIndex);
- PSHUFDMask[AdjIndex / 2] = AdjIndex / 2;
- };
+ assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!");
+ SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
+ InPlaceInputs[0] - HalfOffset;
+ // Put the second input next to the first so that they are packed into
+ // a dword. We find the adjacent index by toggling the low bit.
+ int AdjIndex = InPlaceInputs[0] ^ 1;
+ SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset;
+ llvm::replace(HalfMask, InPlaceInputs[1], AdjIndex);
+ PSHUFDMask[AdjIndex / 2] = AdjIndex / 2;
+ };
fixInPlaceInputs(LToLInputs, HToLInputs, PSHUFLMask, LoMask, 0);
fixInPlaceInputs(HToHInputs, LToHInputs, PSHUFHMask, HiMask, 4);
@@ -14143,10 +14135,12 @@ static SDValue lowerV8I16GeneralSingleInputShuffle(
// FIXME: This operation could almost certainly be simplified dramatically to
// look more like the 3-1 fixing operation.
auto moveInputsToRightHalf = [&PSHUFDMask](
- MutableArrayRef<int> IncomingInputs, ArrayRef<int> ExistingInputs,
- MutableArrayRef<int> SourceHalfMask, MutableArrayRef<int> HalfMask,
- MutableArrayRef<int> FinalSourceHalfMask, int SourceOffset,
- int DestOffset) {
+ MutableArrayRef<int> IncomingInputs,
+ ArrayRef<int> ExistingInputs,
+ MutableArrayRef<int> SourceHalfMask,
+ MutableArrayRef<int> HalfMask,
+ MutableArrayRef<int> FinalSourceHalfMask,
+ int SourceOffset, int DestOffset) {
auto isWordClobbered = [](ArrayRef<int> SourceHalfMask, int Word) {
return SourceHalfMask[Word] >= 0 && SourceHalfMask[Word] != Word;
};
@@ -14342,9 +14336,11 @@ static SDValue lowerV8I16GeneralSingleInputShuffle(
/// Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the
/// blend if only one input is used.
-static SDValue lowerShuffleAsBlendOfPSHUFBs(
- const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) {
+static SDValue lowerShuffleAsBlendOfPSHUFBs(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const APInt &Zeroable,
+ SelectionDAG &DAG, bool &V1InUse,
+ bool &V2InUse) {
assert(!is128BitLaneCrossingShuffleMask(VT, Mask) &&
"Lane crossing shuffle masks not supported");
@@ -14439,8 +14435,8 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Broadcast;
// Try to use bit rotation instructions.
- if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v8i16, V1, Mask,
- Subtarget, DAG))
+ if (SDValue Rotate =
+ lowerShuffleAsBitRotate(DL, MVT::v8i16, V1, Mask, Subtarget, DAG))
return Rotate;
// Use dedicated unpack instructions for masks that match their pattern.
@@ -14475,14 +14471,14 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// See if we can use SSE4A Extraction / Insertion.
if (Subtarget.hasSSE4A())
- if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask,
- Zeroable, DAG))
+ if (SDValue V =
+ lowerShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, Zeroable, DAG))
return V;
// There are special ways we can lower some single-element blends.
if (NumV2Inputs == 1)
- if (SDValue V = lowerShuffleAsElementInsertion(
- DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v8i16, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return V;
// We have different paths for blend lowering, but they all must use the
@@ -14598,8 +14594,8 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// can both shuffle and set up the inefficient blend.
if (!IsBlendSupported && Subtarget.hasSSSE3()) {
bool V1InUse, V2InUse;
- return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask,
- Zeroable, DAG, V1InUse, V2InUse);
+ return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask, Zeroable,
+ DAG, V1InUse, V2InUse);
}
// We can always bit-blend if we have to so the fallback strategy is to
@@ -14732,8 +14728,8 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// See if we can use SSE4A Extraction / Insertion.
if (Subtarget.hasSSE4A())
- if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask,
- Zeroable, DAG))
+ if (SDValue V =
+ lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG))
return V;
int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; });
@@ -14746,8 +14742,8 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Broadcast;
// Try to use bit rotation instructions.
- if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v16i8, V1, Mask,
- Subtarget, DAG))
+ if (SDValue Rotate =
+ lowerShuffleAsBitRotate(DL, MVT::v16i8, V1, Mask, Subtarget, DAG))
return Rotate;
if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, V1, V2, Mask, DAG))
@@ -14788,7 +14784,7 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1};
SmallDenseMap<int, int, 8> LaneMap;
for (int I : InPlaceInputs) {
- PreDupI16Shuffle[I/2] = I/2;
+ PreDupI16Shuffle[I / 2] = I / 2;
LaneMap[I] = I;
}
int j = TargetLo ? 0 : 4, je = j + 4;
@@ -14802,7 +14798,8 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
++j;
if (j == je)
- // We can't place the inputs into a single half with a simple i16 shuffle, so bail.
+ // We can't place the inputs into a single half with a simple i16
+ // shuffle, so bail.
return SDValue();
// Map this input with the i16 shuffle.
@@ -14923,8 +14920,8 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Use PALIGNR+Permute if possible - permute might become PSHUFB but the
// PALIGNR will be cheaper than the second PSHUFB+OR.
- if (SDValue V = lowerShuffleAsByteRotateAndPermute(
- DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue V = lowerShuffleAsByteRotateAndPermute(DL, MVT::v16i8, V1, V2,
+ Mask, Subtarget, DAG))
return V;
}
@@ -14933,8 +14930,8 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// There are special ways we can lower some single-element blends.
if (NumV2Elements == 1)
- if (SDValue V = lowerShuffleAsElementInsertion(
- DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v16i8, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return V;
if (SDValue Blend = lowerShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG))
@@ -15026,8 +15023,8 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (M >= 0)
M /= 2;
} else {
- // Otherwise just unpack the low half of V into VLoHalf and the high half into
- // VHiHalf so that we can blend them as i16s.
+ // Otherwise just unpack the low half of V into VLoHalf and the high half
+ // into VHiHalf so that we can blend them as i16s.
SDValue Zero = getZeroVector(MVT::v16i8, Subtarget, DAG, DL);
VLoHalf = DAG.getBitcast(
@@ -15036,8 +15033,10 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
MVT::v8i16, DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
}
- SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask);
- SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask);
+ SDValue LoV =
+ DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask);
+ SDValue HiV =
+ DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask);
return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV);
}
@@ -15046,9 +15045,8 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
///
/// This routine breaks down the specific type of 128-bit shuffle and
/// dispatches to the lowering routines accordingly.
-static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- MVT VT, SDValue V1, SDValue V2,
- const APInt &Zeroable,
+static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+ SDValue V1, SDValue V2, const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
if (VT == MVT::v8bf16) {
@@ -15219,7 +15217,7 @@ static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(!V2.isUndef() && "This routine must not be used to lower single-input "
- "shuffles as it could then recurse on itself.");
+ "shuffles as it could then recurse on itself.");
int Size = Mask.size();
// If this can be modeled as a broadcast of two elements followed by a blend,
@@ -15558,8 +15556,8 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
// instruction bytes needed to explicitly generate the zero vector.
// Blends are faster and handle all the non-lane-crossing cases.
- if (SDValue Blend = lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable,
- Subtarget, DAG))
+ if (SDValue Blend =
+ lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
return Blend;
// If either input operand is a zero vector, use VPERM2X128 because its mask
@@ -15585,8 +15583,8 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
// Try to use SHUF128 if possible.
if (Subtarget.hasVLX()) {
if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) {
- unsigned PermMask = ((WidenedMask[0] % 2) << 0) |
- ((WidenedMask[1] % 2) << 1);
+ unsigned PermMask =
+ ((WidenedMask[0] % 2) << 0) | ((WidenedMask[1] % 2) << 1);
return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
DAG.getTargetConstant(PermMask, DL, MVT::i8));
}
@@ -15610,7 +15608,7 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
(WidenedMask[1] >= 0 || IsHighZero) && "Undef half?");
unsigned PermMask = 0;
- PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0);
+ PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0);
PermMask |= IsHighZero ? 0x80 : (WidenedMask[1] << 4);
// Check the immediate mask and replace unused sources with undef.
@@ -15802,9 +15800,9 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask(
/// adjusted to access the extracted halves of the original shuffle operands is
/// returned in HalfMask. HalfIdx1 and HalfIdx2 return whether the upper or
/// lower half of each input operand is accessed.
-static bool
-getHalfShuffleMask(ArrayRef<int> Mask, MutableArrayRef<int> HalfMask,
- int &HalfIdx1, int &HalfIdx2) {
+static bool getHalfShuffleMask(ArrayRef<int> Mask,
+ MutableArrayRef<int> HalfMask, int &HalfIdx1,
+ int &HalfIdx2) {
assert((Mask.size() == HalfMask.size() * 2) &&
"Expected input mask to be twice as long as output");
@@ -15857,7 +15855,8 @@ getHalfShuffleMask(ArrayRef<int> Mask, MutableArrayRef<int> HalfMask,
static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2,
ArrayRef<int> HalfMask, int HalfIdx1,
int HalfIdx2, bool UndefLower,
- SelectionDAG &DAG, bool UseConcat = false) {
+ SelectionDAG &DAG,
+ bool UseConcat = false) {
assert(V1.getValueType() == V2.getValueType() && "Different sized vectors?");
assert(V1.getValueType().isSimple() && "Expecting only simple types");
@@ -16219,7 +16218,7 @@ static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
assert(isUndefOrZeroOrInRange(Mask, 0, 2 * NumElts) &&
"Illegal shuffle mask");
- bool ZeroLane[2] = { true, true };
+ bool ZeroLane[2] = {true, true};
for (int i = 0; i < NumElts; ++i)
ZeroLane[i & 1] &= Zeroable[i];
@@ -16304,9 +16303,9 @@ static SDValue lowerShuffleAsVTRUNCAndUnpack(const SDLoc &DL, MVT VT,
// The VTRUNCs will put 0s in the upper 12 bytes. Use them to put zeroes in
// the upper bits of the result using an unpckldq.
- SDValue Unpack = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2,
- { 0, 1, 2, 3, 16, 17, 18, 19,
- 4, 5, 6, 7, 20, 21, 22, 23 });
+ SDValue Unpack = DAG.getVectorShuffle(
+ MVT::v16i8, DL, V1, V2,
+ {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23});
// Insert the unpckldq into a zero vector to widen to v32i8.
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v32i8,
DAG.getConstant(0, DL, MVT::v32i8), Unpack,
@@ -16543,8 +16542,8 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Blend;
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, Mask,
- Subtarget, DAG))
+ if (SDValue Broadcast =
+ lowerShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
// Try to use shift instructions if fast.
@@ -16651,8 +16650,8 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Blend;
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, Mask,
- Subtarget, DAG))
+ if (SDValue Broadcast =
+ lowerShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
if (!Subtarget.hasAVX2()) {
@@ -16799,8 +16798,8 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Blend;
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, Mask,
- Subtarget, DAG))
+ if (SDValue Broadcast =
+ lowerShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
// Try to use shift instructions if fast.
@@ -16967,7 +16966,8 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Try to produce a fixed cross-128-bit lane permute followed by unpack
// because that should be faster than the variable permute alternatives.
- if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v16i16, V1, V2, Mask, DAG))
+ if (SDValue V =
+ lowerShuffleWithUNPCK256(DL, MVT::v16i16, V1, V2, Mask, DAG))
return V;
// There are no generalized cross-lane shuffle operations available on i16
@@ -16986,8 +16986,8 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// As this is a single-input shuffle, the repeated mask should be
// a strictly valid v8i16 mask that we can pass through to the v8i16
// lowering to handle even the v16 case.
- return lowerV8I16GeneralSingleInputShuffle(
- DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG);
+ return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v16i16, V1,
+ RepeatedMask, Subtarget, DAG);
}
}
@@ -17006,8 +17006,8 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Result;
// Try to permute the lanes and then use a per-lane permute.
- if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
- DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
+ if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v16i16, V1, V2,
+ Mask, DAG, Subtarget))
return V;
// Try to match an interleave of two v16i16s and lower them as unpck and
@@ -17043,8 +17043,8 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return ZExt;
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, Mask,
- Subtarget, DAG))
+ if (SDValue Broadcast =
+ lowerShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
@@ -17096,8 +17096,8 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v32i8, V1, V2, Mask, DAG))
return V;
- if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
- DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
+ if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v32i8, V1, V2,
+ Mask, DAG, Subtarget))
return V;
return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v32i8, V1, V2, Mask,
@@ -17119,16 +17119,16 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Result;
// Try to permute the lanes and then use a per-lane permute.
- if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
- DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
+ if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v32i8, V1, V2,
+ Mask, DAG, Subtarget))
return V;
// Look for {0, 8, 16, 24, 32, 40, 48, 56 } in the first 8 elements. Followed
// by zeroable elements in the remaining 24 elements. Turn this into two
// vmovqb instructions shuffled together.
if (Subtarget.hasVLX())
- if (SDValue V = lowerShuffleAsVTRUNCAndUnpack(DL, MVT::v32i8, V1, V2,
- Mask, Zeroable, DAG))
+ if (SDValue V = lowerShuffleAsVTRUNCAndUnpack(DL, MVT::v32i8, V1, V2, Mask,
+ Zeroable, DAG))
return V;
// Try to match an interleave of two v32i8s and lower them as unpck and
@@ -17183,7 +17183,8 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return V;
if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
return V;
- return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG, /*SimpleOnly*/ false);
+ return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG,
+ /*SimpleOnly*/ false);
}
MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits),
@@ -17432,8 +17433,7 @@ static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// If we have a single input shuffle with different shuffle patterns in the
// 128-bit lanes and don't lane cross, use variable mask VPERMILPS.
- if (V2.isUndef() &&
- !is128BitLaneCrossingShuffleMask(MVT::v16f32, Mask)) {
+ if (V2.isUndef() && !is128BitLaneCrossingShuffleMask(MVT::v16f32, Mask)) {
SDValue VPermMask = getConstVector(Mask, MVT::v16i32, DAG, DL, true);
return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v16f32, V1, VPermMask);
}
@@ -17700,8 +17700,8 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
- if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
- DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v64i8, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return ZExt;
// Use dedicated unpack instructions for masks that match their pattern.
@@ -17778,7 +17778,8 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (Subtarget.hasVBMI())
return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, Subtarget, DAG);
- return splitAndLowerShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG, /*SimpleOnly*/ false);
+ return splitAndLowerShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG,
+ /*SimpleOnly*/ false);
}
/// High-level routine to lower various 512-bit x86 vector shuffles.
@@ -17786,13 +17787,11 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// This routine either breaks down the specific type of a 512-bit x86 vector
/// shuffle or splits it into two 256-bit shuffles and fuses the results back
/// together based on the available instructions.
-static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- MVT VT, SDValue V1, SDValue V2,
- const APInt &Zeroable,
+static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+ SDValue V1, SDValue V2, const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- assert(Subtarget.hasAVX512() &&
- "Cannot lower 512-bit vectors w/ basic ISA!");
+ assert(Subtarget.hasAVX512() && "Cannot lower 512-bit vectors w/ basic ISA!");
// If we have a single input to the zero element, insert that into V1 if we
// can do so cheaply.
@@ -17810,8 +17809,8 @@ static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
return V;
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask,
- Subtarget, DAG))
+ if (SDValue Broadcast =
+ lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
return Broadcast;
if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) {
@@ -17823,7 +17822,8 @@ static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
return V;
- return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG, /*SimpleOnly*/ false);
+ return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG,
+ /*SimpleOnly*/ false);
}
if (VT == MVT::v32f16 || VT == MVT::v32bf16) {
@@ -17930,14 +17930,12 @@ static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef<int> Mask,
return -1;
}
-
// Lower vXi1 vector shuffles.
// There is no a dedicated instruction on AVX-512 that shuffles the masks.
// The only way to shuffle bits is to sign-extend the mask vector to SIMD
// vector, shuffle and then truncate it back.
-static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- MVT VT, SDValue V1, SDValue V2,
- const APInt &Zeroable,
+static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+ SDValue V1, SDValue V2, const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(Subtarget.hasAVX512() &&
@@ -18068,8 +18066,8 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
int NumElems = VT.getVectorNumElements();
if ((Subtarget.hasBWI() && (NumElems >= 32)) ||
(Subtarget.hasDQI() && (NumElems < 32)))
- return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, ExtVT),
- Shuffle, ISD::SETGT);
+ return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, ExtVT), Shuffle,
+ ISD::SETGT);
return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle);
}
@@ -18196,7 +18194,7 @@ static SDValue canonicalizeShuffleMaskWithHorizOp(
unsigned RootSizeInBits, const SDLoc &DL, SelectionDAG &DAG,
const X86Subtarget &Subtarget);
- /// Top-level lowering for x86 vector shuffles.
+/// Top-level lowering for x86 vector shuffles.
///
/// This handles decomposition, canonicalization, and lowering of all x86
/// vector shuffles. Most of the specific lowering strategies are encapsulated
@@ -18272,8 +18270,8 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
// by obfuscating the operands with bitcasts.
// TODO: Avoid lowering directly from this top-level function: make this
// a query (canLowerAsBroadcast) and defer lowering to the type-based calls.
- if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, OrigMask,
- Subtarget, DAG))
+ if (SDValue Broadcast =
+ lowerShuffleAsBroadcast(DL, VT, V1, V2, OrigMask, Subtarget, DAG))
return Broadcast;
MVT NewEltVT = VT.isFloatingPoint()
@@ -18496,8 +18494,7 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
// Build a mask by testing the condition against zero.
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
SDValue Mask = DAG.getSetCC(dl, MaskVT, Cond,
- DAG.getConstant(0, dl, CondVT),
- ISD::SETNE);
+ DAG.getConstant(0, dl, CondVT), ISD::SETNE);
// Now return a new VSELECT using the mask.
return DAG.getSelect(dl, VT, Mask, LHS, RHS);
}
@@ -18602,7 +18599,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
}
if (VT == MVT::i32 || VT == MVT::i64)
- return Op;
+ return Op;
return SDValue();
}
@@ -18615,7 +18612,7 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
SDLoc dl(Vec);
MVT VecVT = Vec.getSimpleValueType();
SDValue Idx = Op.getOperand(1);
- auto* IdxC = dyn_cast<ConstantSDNode>(Idx);
+ auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
MVT EltVT = Op.getSimpleValueType();
assert((VecVT.getVectorNumElements() <= 16 || Subtarget.hasBWI()) &&
@@ -18630,7 +18627,8 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
if (NumElts == 1) {
Vec = widenMaskVector(Vec, false, Subtarget, DAG, dl);
MVT IntVT = MVT::getIntegerVT(Vec.getValueType().getVectorNumElements());
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, DAG.getBitcast(IntVT, Vec));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getBitcast(IntVT, Vec));
}
MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8;
MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts);
@@ -18688,14 +18686,13 @@ static APInt getExtractedDemandedElts(SDNode *N) {
return DemandedElts;
}
-SDValue
-X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Vec = Op.getOperand(0);
MVT VecVT = Vec.getSimpleValueType();
SDValue Idx = Op.getOperand(1);
- auto* IdxC = dyn_cast<ConstantSDNode>(Idx);
+ auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
if (VecVT.getVectorElementType() == MVT::i1)
return ExtractBitFromMaskVector(Op, DAG, Subtarget);
@@ -18726,10 +18723,10 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// | | Ports pressure in cycles | |
// |Uops| 1 | 2 - D |3 - D | 4 | 5 | |
// ---------------------------------------------------------
- // |2^ | | 0.5 | 0.5 |1.0| |CP| vmovaps xmmword ptr [rsp-0x18], xmm0
- // |1 |0.5| | | |0.5| | lea rax, ptr [rsp-0x18]
- // |1 | |0.5, 0.5|0.5, 0.5| | |CP| mov al, byte ptr [rdi+rax*1]
- // Total Num Of Uops: 4
+ // |2^ | | 0.5 | 0.5 |1.0| |CP| vmovaps xmmword ptr [rsp-0x18],
+ // xmm0 |1 |0.5| | | |0.5| | lea rax, ptr [rsp-0x18] |1
+ // | |0.5, 0.5|0.5, 0.5| | |CP| mov al, byte ptr [rdi+rax*1] Total Num
+ // Of Uops: 4
return SDValue();
}
@@ -18834,7 +18831,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// UNPCKHPD the element to the lowest double word, then movsd.
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
- int Mask[2] = { 1, -1 };
+ int Mask[2] = {1, -1};
Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
DAG.getVectorIdxConstant(0, dl));
@@ -18859,9 +18856,10 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
unsigned NumElts = VecVT.getVectorNumElements();
MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8;
MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts);
- SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
- DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec),
- DAG.getNode(ISD::SIGN_EXTEND, dl, ExtEltVT, Elt), Idx);
+ SDValue ExtOp =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
+ DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec),
+ DAG.getNode(ISD::SIGN_EXTEND, dl, ExtEltVT, Elt), Idx);
return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp);
}
@@ -18888,9 +18886,9 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
if (EltVT == MVT::bf16) {
MVT IVT = VT.changeVectorElementTypeToInteger();
- SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVT,
- DAG.getBitcast(IVT, N0),
- DAG.getBitcast(MVT::i16, N1), N2);
+ SDValue Res =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVT, DAG.getBitcast(IVT, N0),
+ DAG.getBitcast(MVT::i16, N1), N2);
return DAG.getBitcast(VT, Res);
}
@@ -19151,8 +19149,9 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
}
// Returns the appropriate wrapper opcode for a global reference.
-unsigned X86TargetLowering::getGlobalWrapperKind(
- const GlobalValue *GV, const unsigned char OpFlags) const {
+unsigned
+X86TargetLowering::getGlobalWrapperKind(const GlobalValue *GV,
+ const unsigned char OpFlags) const {
// References to absolute symbols are never PC-relative.
if (GV && GV->isAbsoluteSymbolRef())
return X86ISD::Wrapper;
@@ -19176,8 +19175,8 @@ unsigned X86TargetLowering::getGlobalWrapperKind(
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOV32ri.
-SDValue
-X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
@@ -19227,11 +19226,10 @@ SDValue X86TargetLowering::LowerExternalSymbol(SDValue Op,
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false, nullptr);
}
-SDValue
-X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
// Create the TargetBlockAddressAddress node.
- unsigned char OpFlags =
- Subtarget.classifyBlockAddressReference();
+ unsigned char OpFlags = Subtarget.classifyBlockAddressReference();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
SDLoc dl(Op);
@@ -19336,8 +19334,8 @@ SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
return Result;
}
-SDValue
-X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false, nullptr);
}
@@ -19415,24 +19413,24 @@ static SDValue GetTLSADDR(SelectionDAG &DAG, GlobalAddressSDNode *GA,
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
-static SDValue
-LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const EVT PtrVT) {
+static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT) {
return GetTLSADDR(DAG, GA, PtrVT, X86::EAX, X86II::MO_TLSGD,
/*LoadGlobalBaseReg=*/true);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64
-static SDValue
-LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const EVT PtrVT) {
+static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT) {
return GetTLSADDR(DAG, GA, PtrVT, X86::RAX, X86II::MO_TLSGD);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32
-static SDValue
-LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const EVT PtrVT) {
+static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT) {
return GetTLSADDR(DAG, GA, PtrVT, X86::EAX, X86II::MO_TLSGD);
}
@@ -19464,9 +19462,8 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
// Build x at dtpoff.
unsigned char OperandFlags = X86II::MO_DTPOFF;
unsigned WrapperKind = X86ISD::Wrapper;
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
- GA->getValueType(0),
- GA->getOffset(), OperandFlags);
+ SDValue TGA = DAG.getTargetGlobalAddress(
+ GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
// Add x at dtpoff with the base.
@@ -19507,9 +19504,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
// emit "addl x at ntpoff,%eax" (local exec)
// or "addl x at indntpoff,%eax" (initial exec)
// or "addl x at gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic)
- SDValue TGA =
- DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
- GA->getOffset(), OperandFlags);
+ SDValue TGA = DAG.getTargetGlobalAddress(
+ GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
if (model == TLSModel::InitialExec) {
@@ -19528,8 +19524,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
}
-SDValue
-X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
@@ -19543,20 +19539,20 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget.isTargetELF()) {
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
switch (model) {
- case TLSModel::GeneralDynamic:
- if (Subtarget.is64Bit()) {
- if (Subtarget.isTarget64BitLP64())
- return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
- return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT);
- }
- return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
- case TLSModel::LocalDynamic:
- return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(),
- Subtarget.isTarget64BitLP64());
- case TLSModel::InitialExec:
- case TLSModel::LocalExec:
- return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
- PositionIndependent);
+ case TLSModel::GeneralDynamic:
+ if (Subtarget.is64Bit()) {
+ if (Subtarget.isTarget64BitLP64())
+ return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
+ return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT);
+ }
+ return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
+ case TLSModel::LocalDynamic:
+ return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(),
+ Subtarget.isTarget64BitLP64());
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
+ PositionIndependent);
}
llvm_unreachable("Unknown TLS model.");
}
@@ -19577,9 +19573,8 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
WrapperKind = X86ISD::WrapperRIP;
}
SDLoc DL(Op);
- SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
- GA->getValueType(0),
- GA->getOffset(), OpFlag);
+ SDValue Result = DAG.getTargetGlobalAddress(
+ GA->getGlobal(), DL, GA->getValueType(0), GA->getOffset(), OpFlag);
SDValue Offset = DAG.getNode(WrapperKind, DL, PtrVT, Result);
// With PIC32, the address is actually $g + Offset.
@@ -19593,7 +19588,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = DAG.getEntryNode();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
- SDValue Args[] = { Chain, Offset };
+ SDValue Args[] = {Chain, Offset};
Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args);
Chain = DAG.getCALLSEQ_END(Chain, 0, 0, Chain.getValue(1), DL);
@@ -19661,9 +19656,9 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
res = DAG.getLoad(PtrVT, dl, Chain, res, MachinePointerInfo());
// Get the offset of start of .tls section
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
- GA->getValueType(0),
- GA->getOffset(), X86II::MO_SECREL);
+ SDValue TGA =
+ DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
+ GA->getOffset(), X86II::MO_SECREL);
SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);
// The address of the thread local variable is the add of the thread
@@ -19723,8 +19718,8 @@ static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, const SDLoc &dl,
MVT SrcVT = Src.getSimpleValueType();
MVT VT = Op.getSimpleValueType();
- if (!Subtarget.hasDQI() || SrcVT != MVT::i64 || Subtarget.is64Bit() ||
- (VT != MVT::f32 && VT != MVT::f64))
+ if (!Subtarget.hasDQI() || SrcVT != MVT::i64 || Subtarget.is64Bit() ||
+ (VT != MVT::f32 && VT != MVT::f64))
return SDValue();
// Pack the i64 into a vector, do the operation and extract.
@@ -19789,22 +19784,22 @@ static SDValue LowerI64IntToFP16(SDValue Op, const SDLoc &dl, SelectionDAG &DAG,
static bool useVectorCast(unsigned Opcode, MVT FromVT, MVT ToVT,
const X86Subtarget &Subtarget) {
switch (Opcode) {
- case ISD::SINT_TO_FP:
- // TODO: Handle wider types with AVX/AVX512.
- if (!Subtarget.hasSSE2() || FromVT != MVT::v4i32)
- return false;
- // CVTDQ2PS or (V)CVTDQ2PD
- return ToVT == MVT::v4f32 || (Subtarget.hasAVX() && ToVT == MVT::v4f64);
-
- case ISD::UINT_TO_FP:
- // TODO: Handle wider types and i64 elements.
- if (!Subtarget.hasAVX512() || FromVT != MVT::v4i32)
- return false;
- // VCVTUDQ2PS or VCVTUDQ2PD
- return ToVT == MVT::v4f32 || ToVT == MVT::v4f64;
+ case ISD::SINT_TO_FP:
+ // TODO: Handle wider types with AVX/AVX512.
+ if (!Subtarget.hasSSE2() || FromVT != MVT::v4i32)
+ return false;
+ // CVTDQ2PS or (V)CVTDQ2PD
+ return ToVT == MVT::v4f32 || (Subtarget.hasAVX() && ToVT == MVT::v4f64);
- default:
+ case ISD::UINT_TO_FP:
+ // TODO: Handle wider types and i64 elements.
+ if (!Subtarget.hasAVX512() || FromVT != MVT::v4i32)
return false;
+ // VCVTUDQ2PS or VCVTUDQ2PD
+ return ToVT == MVT::v4f32 || ToVT == MVT::v4f64;
+
+ default:
+ return false;
}
}
@@ -19948,7 +19943,7 @@ static SDValue lowerINT_TO_FP_vXi64(SDValue Op, const SDLoc &DL,
return SDValue();
SDValue Zero = DAG.getConstant(0, DL, MVT::v4i64);
- SDValue One = DAG.getConstant(1, DL, MVT::v4i64);
+ SDValue One = DAG.getConstant(1, DL, MVT::v4i64);
SDValue Sign = DAG.getNode(ISD::OR, DL, MVT::v4i64,
DAG.getNode(ISD::SRL, DL, MVT::v4i64, Src, One),
DAG.getNode(ISD::AND, DL, MVT::v4i64, Src, One));
@@ -20166,7 +20161,7 @@ std::pair<SDValue, SDValue> X86TargetLowering::BuildFILD(
Chain = Result.getValue(1);
}
- return { Result, Chain };
+ return {Result, Chain};
}
/// Horizontal vector math instructions may be slower than normal math with
@@ -20203,18 +20198,18 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, const SDLoc &dl,
LLVMContext *Context = DAG.getContext();
// Build some magic constants.
- static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
+ static const uint32_t CV0[] = {0x43300000, 0x45300000, 0, 0};
Constant *C0 = ConstantDataVector::get(*Context, CV0);
auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, Align(16));
- SmallVector<Constant*,2> CV1;
+ SmallVector<Constant *, 2> CV1;
CV1.push_back(
- ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
- APInt(64, 0x4330000000000000ULL))));
+ ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
+ APInt(64, 0x4330000000000000ULL))));
CV1.push_back(
- ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
- APInt(64, 0x4530000000000000ULL))));
+ ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
+ APInt(64, 0x4530000000000000ULL))));
Constant *C1 = ConstantVector::get(CV1);
SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, Align(16));
@@ -20235,11 +20230,10 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, const SDLoc &dl,
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
SDValue Result;
- if (Subtarget.hasSSE3() &&
- shouldUseHorizontalOp(true, DAG, Subtarget)) {
+ if (Subtarget.hasSSE3() && shouldUseHorizontalOp(true, DAG, Subtarget)) {
Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
} else {
- SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});
+ SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1, -1});
Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
}
Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
@@ -20265,8 +20259,7 @@ static SDValue LowerUINT_TO_FP_i32(SDValue Op, const SDLoc &dl,
// Or the load with the bias.
SDValue Or = DAG.getNode(
- ISD::OR, dl, MVT::v2i64,
- DAG.getBitcast(MVT::v2i64, Load),
+ ISD::OR, dl, MVT::v2i64, DAG.getBitcast(MVT::v2i64, Load),
DAG.getBitcast(MVT::v2i64,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias)));
Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
@@ -20464,8 +20457,9 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, const SDLoc &DL,
SDValue VecBitcast = DAG.getBitcast(VecI16VT, V);
// Low will be bitcasted right away, so do not bother bitcasting back to its
// original type.
- Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast,
- VecCstLowBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8));
+ Low =
+ DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast, VecCstLowBitcast,
+ DAG.getTargetConstant(0xaa, DL, MVT::i8));
// uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
// (uint4) 0x53000000, 0xaa);
SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh);
@@ -20473,7 +20467,8 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, const SDLoc &DL,
// High will be bitcasted right away, so do not bother bitcasting back to
// its original type.
High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast,
- VecCstHighBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8));
+ VecCstHighBitcast,
+ DAG.getTargetConstant(0xaa, DL, MVT::i8));
} else {
SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT);
// uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000;
@@ -20509,7 +20504,8 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, const SDLoc &DL,
return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh);
}
-static SDValue lowerUINT_TO_FP_vec(SDValue Op, const SDLoc &dl, SelectionDAG &DAG,
+static SDValue lowerUINT_TO_FP_vec(SDValue Op, const SDLoc &dl,
+ SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
SDValue N0 = Op.getOperand(OpNo);
@@ -20720,8 +20716,7 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
DstTy = MVT::i64;
}
- assert(DstTy.getSimpleVT() <= MVT::i64 &&
- DstTy.getSimpleVT() >= MVT::i16 &&
+ assert(DstTy.getSimpleVT() <= MVT::i64 && DstTy.getSimpleVT() >= MVT::i16 &&
"Unknown FP_TO_INT to lower!");
// We lower FP->int64 into FISTP64 followed by a load from a temporary
@@ -20759,8 +20754,8 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
bool LosesInfo = false;
if (TheVT == MVT::f64)
// The rounding mode is irrelevant as the conversion should be exact.
- Status = Thresh.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
- &LosesInfo);
+ Status = Thresh.convert(APFloat::IEEEdouble(),
+ APFloat::rmNearestTiesToEven, &LosesInfo);
else if (TheVT == MVT::f80)
Status = Thresh.convert(APFloat::x87DoubleExtended(),
APFloat::rmNearestTiesToEven, &LosesInfo);
@@ -20770,8 +20765,8 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT);
- EVT ResVT = getSetCCResultType(DAG.getDataLayout(),
- *DAG.getContext(), TheVT);
+ EVT ResVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), TheVT);
SDValue Cmp;
if (IsStrict) {
Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETGE, Chain,
@@ -20800,8 +20795,8 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
DAG.getConstantFP(0.0, DL, TheVT));
if (IsStrict) {
- Value = DAG.getNode(ISD::STRICT_FSUB, DL, { TheVT, MVT::Other},
- { Chain, Value, FltOfs });
+ Value = DAG.getNode(ISD::STRICT_FSUB, DL, {TheVT, MVT::Other},
+ {Chain, Value, FltOfs});
Chain = Value.getValue(1);
} else
Value = DAG.getNode(ISD::FSUB, DL, TheVT, Value, FltOfs);
@@ -20815,7 +20810,7 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, DL, Value, StackSlot, MPI);
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
- SDValue Ops[] = { Chain, StackSlot };
+ SDValue Ops[] = {Chain, StackSlot};
unsigned FLDSize = TheVT.getStoreSize();
assert(FLDSize <= MemSize && "Stack slot not big enough");
@@ -20828,10 +20823,9 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
// Build the FP_TO_INT*_IN_MEM
MachineMemOperand *MMO = MF.getMachineMemOperand(
MPI, MachineMemOperand::MOStore, MemSize, Align(MemSize));
- SDValue Ops[] = { Chain, Value, StackSlot };
- SDValue FIST = DAG.getMemIntrinsicNode(X86ISD::FP_TO_INT_IN_MEM, DL,
- DAG.getVTList(MVT::Other),
- Ops, DstTy, MMO);
+ SDValue Ops[] = {Chain, Value, StackSlot};
+ SDValue FIST = DAG.getMemIntrinsicNode(
+ X86ISD::FP_TO_INT_IN_MEM, DL, DAG.getVTList(MVT::Other), Ops, DstTy, MMO);
SDValue Res = DAG.getLoad(Op.getValueType(), DL, FIST, StackSlot, MPI);
Chain = Res.getValue(1);
@@ -21010,7 +21004,7 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
return In;
unsigned NumElems = SrcVT.getVectorNumElements();
- if (NumElems < 2 || !isPowerOf2_32(NumElems) )
+ if (NumElems < 2 || !isPowerOf2_32(NumElems))
return SDValue();
unsigned DstSizeInBits = DstVT.getSizeInBits();
@@ -21081,7 +21075,7 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
// Scale shuffle mask to avoid bitcasts and help ComputeNumSignBits.
SmallVector<int, 64> Mask;
int Scale = 64 / OutVT.getScalarSizeInBits();
- narrowShuffleMaskElts(Scale, { 0, 2, 1, 3 }, Mask);
+ narrowShuffleMaskElts(Scale, {0, 2, 1, 3}, Mask);
Res = DAG.getVectorShuffle(OutVT, DL, Res, Res, Mask);
if (DstVT.is256BitVector())
@@ -21325,14 +21319,12 @@ static SDValue LowerTruncateVecI1(SDValue Op, const SDLoc &DL,
if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) {
// We need to shift to get the lsb into sign position.
// Shift packed bytes not supported natively, bitcast to word
- MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
- In = DAG.getNode(ISD::SHL, DL, ExtVT,
- DAG.getBitcast(ExtVT, In),
+ MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits() / 16);
+ In = DAG.getNode(ISD::SHL, DL, ExtVT, DAG.getBitcast(ExtVT, In),
DAG.getConstant(ShiftInx, DL, ExtVT));
In = DAG.getBitcast(InVT, In);
}
- return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT),
- In, ISD::SETGT);
+ return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), In, ISD::SETGT);
}
// Use TESTD/Q, extended vector to packed dword/qword.
assert((InVT.is256BitVector() || InVT.is128BitVector()) &&
@@ -21370,7 +21362,8 @@ static SDValue LowerTruncateVecI1(SDValue Op, const SDLoc &DL,
// We either have 8 elements or we're allowed to use 512-bit vectors.
// If we have VLX, we want to use the narrowest vector that can get the
// job done so we use vXi32.
- MVT EltVT = Subtarget.hasVLX() ? MVT::i32 : MVT::getIntegerVT(512/NumElts);
+ MVT EltVT =
+ Subtarget.hasVLX() ? MVT::i32 : MVT::getIntegerVT(512 / NumElts);
MVT ExtVT = MVT::getVectorVT(EltVT, NumElts);
In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
InVT = ExtVT;
@@ -21484,10 +21477,9 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
// On AVX2, v8i32 -> v8i16 becomes PSHUFB.
if (Subtarget.hasInt256()) {
// The PSHUFB mask:
- static const int ShufMask1[] = { 0, 1, 4, 5, 8, 9, 12, 13,
- -1, -1, -1, -1, -1, -1, -1, -1,
- 16, 17, 20, 21, 24, 25, 28, 29,
- -1, -1, -1, -1, -1, -1, -1, -1 };
+ static const int ShufMask1[] = {
+ 0, 1, 4, 5, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1,
+ 16, 17, 20, 21, 24, 25, 28, 29, -1, -1, -1, -1, -1, -1, -1, -1};
In = DAG.getBitcast(MVT::v32i8, In);
In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1);
In = DAG.getBitcast(MVT::v4i64, In);
@@ -21665,8 +21657,8 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
dl, {NVT, MVT::Other}, {Chain, Src});
Chain = Res.getValue(1);
} else {
- Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl,
- NVT, Src);
+ Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, NVT,
+ Src);
}
// TODO: Need to add exception check code for strict FP.
@@ -21768,8 +21760,8 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
DAG.getUNDEF(MVT::v2f32));
if (IsStrict) {
- unsigned Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI
- : X86ISD::STRICT_CVTTP2UI;
+ unsigned Opc =
+ IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op->getOperand(0), Tmp});
}
unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
@@ -21894,7 +21886,7 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
makeLibCall(DAG, LC, VT, Src, CallOptions, dl, Chain);
if (IsStrict)
- return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl);
+ return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);
return Tmp.first;
}
@@ -21957,7 +21949,7 @@ SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N,
assert(DstVT == MVT::i64 && "Invalid LRINT/LLRINT to lower!");
Chain = DAG.getStore(Chain, DL, Src, StackPtr, MPI);
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
- SDValue Ops[] = { Chain, StackPtr };
+ SDValue Ops[] = {Chain, StackPtr};
Src = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, SrcVT, MPI,
/*Align*/ std::nullopt,
@@ -21965,7 +21957,7 @@ SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N,
Chain = Src.getValue(1);
}
- SDValue StoreOps[] = { Chain, Src, StackPtr };
+ SDValue StoreOps[] = {Chain, Src, StackPtr};
Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, DL, DAG.getVTList(MVT::Other),
StoreOps, DstVT, MPI, /*Align*/ std::nullopt,
MachineMemOperand::MOStore);
@@ -21973,8 +21965,8 @@ SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N,
return DAG.getLoad(DstVT, DL, Chain, StackPtr, MPI);
}
-SDValue
-X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
+ SelectionDAG &DAG) const {
// This is based on the TargetLowering::expandFP_TO_INT_SAT implementation,
// but making use of X86 specifics to produce better instruction sequences.
SDNode *Node = Op.getNode();
@@ -22036,12 +22028,12 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
APFloat MinFloat(Sem);
APFloat MaxFloat(Sem);
- APFloat::opStatus MinStatus = MinFloat.convertFromAPInt(
- MinInt, IsSigned, APFloat::rmTowardZero);
- APFloat::opStatus MaxStatus = MaxFloat.convertFromAPInt(
- MaxInt, IsSigned, APFloat::rmTowardZero);
- bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact)
- && !(MaxStatus & APFloat::opStatus::opInexact);
+ APFloat::opStatus MinStatus =
+ MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
+ APFloat::opStatus MaxStatus =
+ MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
+ bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
+ !(MaxStatus & APFloat::opStatus::opInexact);
SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
@@ -22051,11 +22043,11 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
if (AreExactFloatBounds) {
if (DstVT != TmpVT) {
// Clamp by MinFloat from below. If Src is NaN, propagate NaN.
- SDValue MinClamped = DAG.getNode(
- X86ISD::FMAX, dl, SrcVT, MinFloatNode, Src);
+ SDValue MinClamped =
+ DAG.getNode(X86ISD::FMAX, dl, SrcVT, MinFloatNode, Src);
// Clamp by MaxFloat from above. If Src is NaN, propagate NaN.
- SDValue BothClamped = DAG.getNode(
- X86ISD::FMIN, dl, SrcVT, MaxFloatNode, MinClamped);
+ SDValue BothClamped =
+ DAG.getNode(X86ISD::FMIN, dl, SrcVT, MaxFloatNode, MinClamped);
// Convert clamped value to integer.
SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, BothClamped);
@@ -22065,11 +22057,11 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
}
// Clamp by MinFloat from below. If Src is NaN, the result is MinFloat.
- SDValue MinClamped = DAG.getNode(
- X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
+ SDValue MinClamped =
+ DAG.getNode(X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
// Clamp by MaxFloat from above. NaN cannot occur.
- SDValue BothClamped = DAG.getNode(
- X86ISD::FMINC, dl, SrcVT, MinClamped, MaxFloatNode);
+ SDValue BothClamped =
+ DAG.getNode(X86ISD::FMINC, dl, SrcVT, MinClamped, MaxFloatNode);
// Convert clamped value to integer.
SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, DstVT, BothClamped);
@@ -22081,8 +22073,8 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
// Otherwise, select zero if Src is NaN.
SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
- return DAG.getSelectCC(
- dl, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
+ return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt,
+ ISD::CondCode::SETUO);
}
SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
@@ -22104,13 +22096,13 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
if (!IsSigned || SatWidth != TmpVT.getScalarSizeInBits()) {
// If Src ULT MinFloat, select MinInt. In particular, this also selects
// MinInt if Src is NaN.
- Select = DAG.getSelectCC(
- dl, Src, MinFloatNode, MinIntNode, Select, ISD::CondCode::SETULT);
+ Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select,
+ ISD::CondCode::SETULT);
}
// If Src OGT MaxFloat, select MaxInt.
- Select = DAG.getSelectCC(
- dl, Src, MaxFloatNode, MaxIntNode, Select, ISD::CondCode::SETOGT);
+ Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select,
+ ISD::CondCode::SETOGT);
// In the unsigned case we are done, because we mapped NaN to MinInt, which
// is already zero. The promoted case was already handled above.
@@ -22120,8 +22112,7 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
// Otherwise, select 0 if Src is NaN.
SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
- return DAG.getSelectCC(
- dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
+ return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
}
SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
@@ -22177,15 +22168,15 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
Entry.IsZExt = true;
Args.push_back(Entry);
- SDValue Callee = DAG.getExternalSymbol(
- getLibcallName(RTLIB::FPEXT_F16_F32),
- getPointerTy(DAG.getDataLayout()));
+ SDValue Callee =
+ DAG.getExternalSymbol(getLibcallName(RTLIB::FPEXT_F16_F32),
+ getPointerTy(DAG.getDataLayout()));
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
CallingConv::C, EVT(VT).getTypeForEVT(*DAG.getContext()), Callee,
std::move(Args));
SDValue Res;
- std::tie(Res,Chain) = LowerCallTo(CLI);
+ std::tie(Res, Chain) = LowerCallTo(CLI);
if (IsStrict)
Res = DAG.getMergeValues({Res, Chain}, DL);
@@ -22453,14 +22444,14 @@ static SDValue lowerAddSubToHorizontalOp(SDValue Op, const SDLoc &DL,
// TODO: Allow commuted (f)sub by negating the result of (F)HSUB?
unsigned HOpcode;
switch (Op.getOpcode()) {
- // clang-format off
+ // clang-format off
case ISD::ADD: HOpcode = X86ISD::HADD; break;
case ISD::SUB: HOpcode = X86ISD::HSUB; break;
case ISD::FADD: HOpcode = X86ISD::FHADD; break;
case ISD::FSUB: HOpcode = X86ISD::FHSUB; break;
default:
llvm_unreachable("Trying to lower unsupported opcode to horizontal op");
- // clang-format on
+ // clang-format on
}
unsigned LExtIndex = LHS.getConstantOperandVal(1);
unsigned RExtIndex = RHS.getConstantOperandVal(1);
@@ -22518,7 +22509,7 @@ static SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) {
bool Ignored;
APFloat Point5Pred = APFloat(0.5f);
Point5Pred.convert(Sem, APFloat::rmNearestTiesToEven, &Ignored);
- Point5Pred.next(/*nextDown*/true);
+ Point5Pred.next(/*nextDown*/ true);
SDValue Adder = DAG.getNode(ISD::FCOPYSIGN, dl, VT,
DAG.getConstantFP(Point5Pred, dl, VT), N0);
@@ -22568,16 +22559,16 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
unsigned EltBits = VT.getScalarSizeInBits();
// For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
- APInt MaskElt = IsFABS ? APInt::getSignedMaxValue(EltBits) :
- APInt::getSignMask(EltBits);
+ APInt MaskElt =
+ IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignMask(EltBits);
const fltSemantics &Sem = VT.getFltSemantics();
SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT);
SDValue Op0 = Op.getOperand(0);
bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
- unsigned LogicOp = IsFABS ? X86ISD::FAND :
- IsFNABS ? X86ISD::FOR :
- X86ISD::FXOR;
+ unsigned LogicOp = IsFABS ? X86ISD::FAND
+ : IsFNABS ? X86ISD::FOR
+ : X86ISD::FXOR;
SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
if (VT.isVector() || IsF128)
@@ -22680,7 +22671,8 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
}
/// Helper for attempting to create a X86ISD::BT node.
-static SDValue getBT(SDValue Src, SDValue BitNo, const SDLoc &DL, SelectionDAG &DAG) {
+static SDValue getBT(SDValue Src, SDValue BitNo, const SDLoc &DL,
+ SelectionDAG &DAG) {
// If Src is i8, promote it to i32 with any_extend. There is no i8 BT
// instruction. Since the shift amount is in-range-or-undefined, we know
// that doing a bittest on the i32 value is ok. We extend to i32 because
@@ -23285,8 +23277,7 @@ static bool hasNonFlagsUse(SDValue Op) {
// the node alone and emit a 'cmp' or 'test' instruction.
static bool isProfitableToUseFlagOp(SDValue Op) {
for (SDNode *U : Op->users())
- if (U->getOpcode() != ISD::CopyToReg &&
- U->getOpcode() != ISD::SETCC &&
+ if (U->getOpcode() != ISD::CopyToReg && U->getOpcode() != ISD::SETCC &&
U->getOpcode() != ISD::STORE)
return false;
@@ -23302,14 +23293,20 @@ static SDValue EmitTest(SDValue Op, X86::CondCode X86CC, const SDLoc &dl,
bool NeedCF = false;
bool NeedOF = false;
switch (X86CC) {
- default: break;
- case X86::COND_A: case X86::COND_AE:
- case X86::COND_B: case X86::COND_BE:
+ default:
+ break;
+ case X86::COND_A:
+ case X86::COND_AE:
+ case X86::COND_B:
+ case X86::COND_BE:
NeedCF = true;
break;
- case X86::COND_G: case X86::COND_GE:
- case X86::COND_L: case X86::COND_LE:
- case X86::COND_O: case X86::COND_NO: {
+ case X86::COND_G:
+ case X86::COND_GE:
+ case X86::COND_L:
+ case X86::COND_LE:
+ case X86::COND_O:
+ case X86::COND_NO: {
// Check if we really need to set the
// Overflow flag. If NoSignedWrap is present
// that is not actually needed.
@@ -23361,14 +23358,14 @@ static SDValue EmitTest(SDValue Op, X86::CondCode X86CC, const SDLoc &dl,
// Otherwise use a regular EFLAGS-setting instruction.
switch (ArithOp.getOpcode()) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("unexpected operator!");
case ISD::ADD: Opcode = X86ISD::ADD; break;
case ISD::SUB: Opcode = X86ISD::SUB; break;
case ISD::XOR: Opcode = X86ISD::XOR; break;
case ISD::AND: Opcode = X86ISD::AND; break;
case ISD::OR: Opcode = X86ISD::OR; break;
- // clang-format on
+ // clang-format on
}
NumOperands = 2;
@@ -23383,8 +23380,9 @@ static SDValue EmitTest(SDValue Op, X86::CondCode X86CC, const SDLoc &dl,
case ISD::USUBO: {
// /USUBO/SSUBO will become a X86ISD::SUB and we can use its Z flag.
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- return DAG.getNode(X86ISD::SUB, dl, VTs, Op->getOperand(0),
- Op->getOperand(1)).getValue(1);
+ return DAG
+ .getNode(X86ISD::SUB, dl, VTs, Op->getOperand(0), Op->getOperand(1))
+ .getValue(1);
}
default:
break;
@@ -23413,8 +23411,9 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, X86::CondCode X86CC,
EVT CmpVT = Op0.getValueType();
- assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 ||
- CmpVT == MVT::i32 || CmpVT == MVT::i64) && "Unexpected VT!");
+ assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 || CmpVT == MVT::i32 ||
+ CmpVT == MVT::i64) &&
+ "Unexpected VT!");
// Only promote the compare up to I32 if it is a 16 bit operation
// with an immediate. 16 bit immediates are to be avoided unless the target
@@ -23532,9 +23531,8 @@ bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
/// The minimum architected relative accuracy is 2^-12. We need one
/// Newton-Raphson step to have a good float result (24 bits of precision).
-SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
- SelectionDAG &DAG, int Enabled,
- int &RefinementSteps,
+SDValue X86TargetLowering::getSqrtEstimate(SDValue Op, SelectionDAG &DAG,
+ int Enabled, int &RefinementSteps,
bool &UseOneConstNR,
bool Reciprocal) const {
SDLoc DL(Op);
@@ -23641,9 +23639,7 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG,
/// This is because we still need one division to calculate the reciprocal and
/// then we need two multiplies by that reciprocal as replacements for the
/// original divisions.
-unsigned X86TargetLowering::combineRepeatedFPDivisors() const {
- return 2;
-}
+unsigned X86TargetLowering::combineRepeatedFPDivisors() const { return 2; }
SDValue
X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
@@ -23651,7 +23647,7 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
- return SDValue(N,0); // Lower SDIV as SDIV
+ return SDValue(N, 0); // Lower SDIV as SDIV
assert((Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()) &&
"Unexpected divisor!");
@@ -23720,8 +23716,8 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl,
if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) &&
isPowerOf2_64(AndRHSVal)) {
Src = AndLHS;
- BitNo = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl,
- Src.getValueType());
+ BitNo =
+ DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl, Src.getValueType());
}
}
}
@@ -23767,7 +23763,7 @@ static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
// 6 - NLE
// 7 - ORD
switch (SetCCOpcode) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETOEQ:
case ISD::SETEQ: SSECC = 0; break;
@@ -23789,7 +23785,7 @@ static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
case ISD::SETO: SSECC = 7; break;
case ISD::SETUEQ: SSECC = 8; break;
case ISD::SETONE: SSECC = 12; break;
- // clang-format on
+ // clang-format on
}
if (Swap)
std::swap(Op0, Op1);
@@ -24074,13 +24070,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
Cmp1 = DAG.getNode(
Opc, dl, {VT, MVT::Other},
{Chain, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8)});
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Cmp0.getValue(1),
- Cmp1.getValue(1));
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Cmp0.getValue(1), Cmp1.getValue(1));
} else {
- Cmp0 = DAG.getNode(
- Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8));
- Cmp1 = DAG.getNode(
- Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8));
+ Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
+ DAG.getTargetConstant(CC0, dl, MVT::i8));
+ Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
+ DAG.getTargetConstant(CC1, dl, MVT::i8));
}
Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
} else {
@@ -24090,8 +24086,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
{Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)});
Chain = Cmp.getValue(1);
} else
- Cmp = DAG.getNode(
- Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8));
+ Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1,
+ DAG.getTargetConstant(SSECC, dl, MVT::i8));
}
} else {
// Handle all other FP comparisons here.
@@ -24103,8 +24099,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
{Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)});
Chain = Cmp.getValue(1);
} else
- Cmp = DAG.getNode(
- Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8));
+ Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1,
+ DAG.getTargetConstant(SSECC, dl, MVT::i8));
}
if (VT.getFixedSizeInBits() >
@@ -24155,7 +24151,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// Translate compare code to XOP PCOM compare mode.
unsigned CmpMode = 0;
switch (Cond) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETULT:
case ISD::SETLT: CmpMode = 0x00; break;
@@ -24167,7 +24163,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
case ISD::SETGE: CmpMode = 0x03; break;
case ISD::SETEQ: CmpMode = 0x04; break;
case ISD::SETNE: CmpMode = 0x05; break;
- // clang-format on
+ // clang-format on
}
// Are we comparing unsigned or signed integers?
@@ -24265,13 +24261,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
bool Invert = false;
unsigned Opc;
switch (Cond) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Unexpected condition code");
case ISD::SETUGT: Invert = true; [[fallthrough]];
case ISD::SETULE: Opc = ISD::UMIN; break;
case ISD::SETULT: Invert = true; [[fallthrough]];
case ISD::SETUGE: Opc = ISD::UMAX; break;
- // clang-format on
+ // clang-format on
}
SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
@@ -24295,10 +24291,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// operations may be required for some comparisons.
unsigned Opc = (Cond == ISD::SETEQ || Cond == ISD::SETNE) ? X86ISD::PCMPEQ
: X86ISD::PCMPGT;
- bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT ||
- Cond == ISD::SETGE || Cond == ISD::SETUGE;
- bool Invert = Cond == ISD::SETNE ||
- (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond));
+ bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT || Cond == ISD::SETGE ||
+ Cond == ISD::SETUGE;
+ bool Invert =
+ Cond == ISD::SETNE || (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond));
if (Swap)
std::swap(Op0, Op1);
@@ -24316,7 +24312,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
Op1 = DAG.getBitcast(MVT::v4i32, Op1);
SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
- static const int MaskHi[] = { 1, 1, 3, 3 };
+ static const int MaskHi[] = {1, 1, 3, 3};
SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
return DAG.getBitcast(VT, Result);
@@ -24327,7 +24323,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
Op1 = DAG.getAllOnesConstant(dl, MVT::v4i32);
SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
- static const int MaskHi[] = { 1, 1, 3, 3 };
+ static const int MaskHi[] = {1, 1, 3, 3};
SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
return DAG.getBitcast(VT, Result);
@@ -24366,8 +24362,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1);
// Create masks for only the low parts/high parts of the 64 bit integers.
- static const int MaskHi[] = { 1, 1, 3, 3 };
- static const int MaskLo[] = { 0, 0, 2, 2 };
+ static const int MaskHi[] = {1, 1, 3, 3};
+ static const int MaskLo[] = {0, 0, 2, 2};
SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi);
SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
@@ -24394,7 +24390,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
// Make sure the lower and upper halves are both all-ones.
- static const int Mask[] = { 1, 0, 3, 2 };
+ static const int Mask[] = {1, 0, 3, 2};
SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask);
Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf);
@@ -24409,8 +24405,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// bits of the inputs before performing those operations.
if (FlipSigns) {
MVT EltVT = VT.getVectorElementType();
- SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl,
- VT);
+ SDValue SM =
+ DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl, VT);
Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM);
Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM);
}
@@ -24427,8 +24423,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// Try to select this as a KORTEST+SETCC or KTEST+SETCC if possible.
static SDValue EmitAVX512Test(SDValue Op0, SDValue Op1, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- SDValue &X86CC) {
+ const X86Subtarget &Subtarget, SDValue &X86CC) {
assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unsupported ISD::CondCode");
// Must be a bitcast from vXi1.
@@ -24575,7 +24570,8 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
Op.getOpcode() == ISD::STRICT_FSETCCS;
MVT VT = Op->getSimpleValueType(0);
- if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
+ if (VT.isVector())
+ return LowerVSETCC(Op, Subtarget, DAG);
assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
@@ -24670,7 +24666,8 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
}
-SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op,
+ SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue Carry = Op.getOperand(2);
@@ -24682,8 +24679,8 @@ SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const
// Recreate the carry if needed.
EVT CarryVT = Carry.getValueType();
- Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
- Carry, DAG.getAllOnesConstant(DL, CarryVT));
+ Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), Carry,
+ DAG.getAllOnesConstant(DL, CarryVT));
SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry.getValue(1));
@@ -24703,7 +24700,8 @@ getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG) {
unsigned BaseOp = 0;
SDLoc DL(Op);
switch (Op.getOpcode()) {
- default: llvm_unreachable("Unknown ovf instruction!");
+ default:
+ llvm_unreachable("Unknown ovf instruction!");
case ISD::SADDO:
BaseOp = X86ISD::ADD;
Cond = X86::COND_O;
@@ -24777,7 +24775,8 @@ static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
SDValue VOp0 = V.getOperand(0);
unsigned InBits = VOp0.getValueSizeInBits();
unsigned Bits = V.getValueSizeInBits();
- return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));
+ return DAG.MaskedValueIsZero(VOp0,
+ APInt::getHighBitsSet(InBits, InBits - Bits));
}
// Lower various (select (icmp CmpVal, 0), LHS, RHS) custom patterns.
@@ -24915,7 +24914,7 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
bool AddTest = true;
- SDValue Cond = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
SDLoc DL(Op);
@@ -25066,14 +25065,13 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
unsigned CondOpcode = Cond.getOpcode();
- if (CondOpcode == X86ISD::SETCC ||
- CondOpcode == X86ISD::SETCC_CARRY) {
+ if (CondOpcode == X86ISD::SETCC || CondOpcode == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
bool IllegalFPCMov = false;
- if (VT.isFloatingPoint() && !VT.isVector() &&
- !isScalarFPTypeInSSEReg(VT) && Subtarget.canUseCMOV()) // FPStack?
+ if (VT.isFloatingPoint() && !VT.isVector() && !isScalarFPTypeInSSEReg(VT) &&
+ Subtarget.canUseCMOV()) // FPStack?
IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
@@ -25136,14 +25134,15 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// X86 doesn't have an i8 cmov. If both operands are the result of a truncate
// widen the cmov and push the truncate through. This avoids introducing a new
// branch during isel and doesn't add any extensions.
- if (Op.getValueType() == MVT::i8 &&
- Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) {
+ if (Op.getValueType() == MVT::i8 && Op1.getOpcode() == ISD::TRUNCATE &&
+ Op2.getOpcode() == ISD::TRUNCATE) {
SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
if (T1.getValueType() == T2.getValueType() &&
// Exclude CopyFromReg to avoid partial register stalls.
- T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){
- SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1,
- CC, Cond);
+ T1.getOpcode() != ISD::CopyFromReg &&
+ T2.getOpcode() != ISD::CopyFromReg) {
+ SDValue Cmov =
+ DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1, CC, Cond);
return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
}
}
@@ -25159,14 +25158,14 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
!X86::mayFoldLoad(Op2, Subtarget))) {
Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
- SDValue Ops[] = { Op2, Op1, CC, Cond };
+ SDValue Ops[] = {Op2, Op1, CC, Cond};
SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, MVT::i32, Ops);
return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
}
// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.
- SDValue Ops[] = { Op2, Op1, CC, Cond };
+ SDValue Ops[] = {Op2, Op1, CC, Cond};
return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops, Op->getFlags());
}
@@ -25276,9 +25275,9 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
InVT = In.getSimpleValueType();
}
- // SSE41 targets can use the pmov[sz]x* instructions directly for 128-bit results,
- // so are legal and shouldn't occur here. AVX2/AVX512 pmovsx* instructions still
- // need to be handled here for 256/512-bit results.
+ // SSE41 targets can use the pmov[sz]x* instructions directly for 128-bit
+ // results, so are legal and shouldn't occur here. AVX2/AVX512 pmovsx*
+ // instructions still need to be handled here for 256/512-bit results.
if (Subtarget.hasInt256()) {
assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension");
@@ -25287,9 +25286,8 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
// FIXME: Apparently we create inreg operations that could be regular
// extends.
- unsigned ExtOpc =
- Opc == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SIGN_EXTEND
- : ISD::ZERO_EXTEND;
+ unsigned ExtOpc = Opc == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
return DAG.getNode(ExtOpc, dl, VT, In);
}
@@ -25407,9 +25405,9 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
SDValue OpLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, In);
unsigned NumElems = InVT.getVectorNumElements();
- SmallVector<int,8> ShufMask(NumElems, -1);
- for (unsigned i = 0; i != NumElems/2; ++i)
- ShufMask[i] = i + NumElems/2;
+ SmallVector<int, 8> ShufMask(NumElems, -1);
+ for (unsigned i = 0; i != NumElems / 2; ++i)
+ ShufMask[i] = i + NumElems / 2;
SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
OpHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, OpHi);
@@ -25573,11 +25571,10 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
// TODO: It is possible to support ZExt by zeroing the undef values during
// the shuffle phase or after the shuffle.
static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG) {
MVT RegVT = Op.getSimpleValueType();
assert(RegVT.isVector() && "We only custom lower vector loads.");
- assert(RegVT.isInteger() &&
- "We only custom lower integer vector loads.");
+ assert(RegVT.isInteger() && "We only custom lower integer vector loads.");
LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
SDLoc dl(Ld);
@@ -25620,8 +25617,8 @@ static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
- SDValue Cond = Op.getOperand(1);
- SDValue Dest = Op.getOperand(2);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Dest = Op.getOperand(2);
SDLoc dl(Op);
// Bail out when we don't have native compare instructions.
@@ -25671,7 +25668,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
if (User->getOpcode() == ISD::BR) {
SDValue FalseBB = User->getOperand(1);
SDNode *NewBR =
- DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
+ DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
assert(NewBR == User);
(void)NewBR;
Dest = FalseBB;
@@ -25742,9 +25739,8 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
// bytes in one go. Touching the stack at 4K increments is necessary to ensure
// that the guard pages used by the OS virtual memory manager are allocated in
// correct sequence.
-SDValue
-X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool SplitStack = MF.shouldSplitStack();
bool EmitStackProbeCall = hasStackProbeSymbol(MF);
@@ -25755,7 +25751,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
// Get the inputs.
SDNode *Node = Op.getNode();
SDValue Chain = Op.getOperand(0);
- SDValue Size = Op.getOperand(1);
+ SDValue Size = Op.getOperand(1);
MaybeAlign Alignment(Op.getConstantOperandVal(2));
EVT VT = Node->getValueType(0);
@@ -25877,8 +25873,9 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MemOps.push_back(Store);
// Store ptr to reg_save_area.
- FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(
- Subtarget.isTarget64BitLP64() ? 8 : 4, DL));
+ FIN = DAG.getNode(
+ ISD::ADD, DL, PtrVT, FIN,
+ DAG.getIntPtrConstant(Subtarget.isTarget64BitLP64() ? 8 : 4, DL));
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT);
Store = DAG.getStore(
Op.getOperand(0), DL, RSFIN, FIN,
@@ -25888,8 +25885,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
}
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
- assert(Subtarget.is64Bit() &&
- "LowerVAARG only handles 64-bit va_arg!");
+ assert(Subtarget.is64Bit() && "LowerVAARG only handles 64-bit va_arg!");
assert(Op.getNumOperands() == 4);
MachineFunction &MF = DAG.getMachineFunction();
@@ -25913,11 +25909,11 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
// selection mechanism works only for the basic types.
assert(ArgVT != MVT::f80 && "va_arg for f80 not yet implemented");
if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
- ArgMode = 2; // Argument passed in XMM register. Use fp_offset.
+ ArgMode = 2; // Argument passed in XMM register. Use fp_offset.
} else {
assert(ArgVT.isInteger() && ArgSize <= 32 /*bytes*/ &&
"Unhandled argument type in LowerVAARG");
- ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
+ ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
}
if (ArgMode == 2) {
@@ -25951,7 +25947,7 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget,
// where a va_list is still an i8*.
assert(Subtarget.is64Bit() && "This code only handles 64-bit va_copy!");
if (Subtarget.isCallingConvWin64(
- DAG.getMachineFunction().getFunction().getCallingConv()))
+ DAG.getMachineFunction().getFunction().getCallingConv()))
// Probably a Win64 va_copy.
return DAG.expandVACopy(Op.getNode());
@@ -26013,15 +26009,17 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
return DAG.getConstant(0, dl, VT);
}
- assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI)
- && "Unknown target vector shift-by-constant node");
+ assert(
+ (Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI) &&
+ "Unknown target vector shift-by-constant node");
// Fold this packed vector shift into a build vector if SrcOp is a
// vector of Constants or UNDEFs.
if (ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) {
unsigned ShiftOpc;
switch (Opc) {
- default: llvm_unreachable("Unknown opcode!");
+ default:
+ llvm_unreachable("Unknown opcode!");
case X86ISD::VSHLI:
ShiftOpc = ISD::SHL;
break;
@@ -26161,8 +26159,8 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
Hi = DAG.getBitcast(MVT::v32i1, Hi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi);
} else {
- MVT BitcastVT = MVT::getVectorVT(MVT::i1,
- Mask.getSimpleValueType().getSizeInBits());
+ MVT BitcastVT =
+ MVT::getVectorVT(MVT::i1, Mask.getSimpleValueType().getSizeInBits());
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
// are extracted by EXTRACT_SUBVECTOR.
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
@@ -26233,9 +26231,12 @@ static int getSEHRegistrationNodeSize(const Function *Fn) {
// The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See
// WinEHStatePass for the full struct definition.
switch (classifyEHPersonality(Fn->getPersonalityFn())) {
- case EHPersonality::MSVC_X86SEH: return 24;
- case EHPersonality::MSVC_CXX: return 16;
- default: break;
+ case EHPersonality::MSVC_X86SEH:
+ return 24;
+ case EHPersonality::MSVC_CXX:
+ return 16;
+ default:
+ break;
}
report_fatal_error(
"can only recover FP for 32-bit MSVC EH personality functions");
@@ -26327,13 +26328,13 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc dl(Op);
unsigned IntNo = Op.getConstantOperandVal(0);
MVT VT = Op.getSimpleValueType();
- const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);
+ const IntrinsicData *IntrData = getIntrinsicWithoutChain(IntNo);
// Propagate flags from original node to transformed node(s).
SelectionDAG::FlagInserter FlagsInserter(DAG, Op->getFlags());
if (IntrData) {
- switch(IntrData->Type) {
+ switch (IntrData->Type) {
case INTR_TYPE_1OP: {
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
@@ -26459,9 +26460,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (!isRoundModeCurDirection(Rnd))
return SDValue();
}
- return getVectorMaskingNode(
- DAG.getNode(IntrData->Opc0, dl, VT, Src), Mask, PassThru,
- Subtarget, DAG);
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
+ Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_1OP_MASK_SAE: {
SDValue Src = Op.getOperand(1);
@@ -26502,9 +26502,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (!isRoundModeCurDirection(Rnd))
return SDValue();
}
- return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
- Src2),
- Mask, passThru, Subtarget, DAG);
+ return getScalarMaskingNode(
+ DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2), Mask, passThru,
+ Subtarget, DAG);
}
assert(Op.getNumOperands() == (6U + HasRounding) &&
@@ -26518,9 +26518,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
else if (!isRoundModeCurDirection(Sae))
return SDValue();
}
- return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1,
- Src2, RoundingMode),
- Mask, passThru, Subtarget, DAG);
+ return getScalarMaskingNode(
+ DAG.getNode(Opc, dl, VT, Src1, Src2, RoundingMode), Mask, passThru,
+ Subtarget, DAG);
}
case INTR_TYPE_SCALAR_MASK_RND: {
SDValue Src1 = Op.getOperand(1);
@@ -26555,8 +26555,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
else
return SDValue();
- return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2),
- Mask, passThru, Subtarget, DAG);
+ return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), Mask,
+ passThru, Subtarget, DAG);
}
case INTR_TYPE_2OP_MASK: {
SDValue Src1 = Op.getOperand(1);
@@ -26592,8 +26592,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return SDValue();
}
- return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2),
- Mask, PassThru, Subtarget, DAG);
+ return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), Mask,
+ PassThru, Subtarget, DAG);
}
case INTR_TYPE_3OP_SCALAR_MASK_SAE: {
SDValue Src1 = Op.getOperand(1);
@@ -26642,12 +26642,12 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// Reverse the operands to match VSELECT order.
return DAG.getNode(IntrData->Opc0, dl, VT, Src3, Src2, Src1);
}
- case VPERM_2OP : {
+ case VPERM_2OP: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
// Swap Src1 and Src2 in the node creation
- return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1);
+ return DAG.getNode(IntrData->Opc0, dl, VT, Src2, Src1);
}
case CFMA_OP_MASKZ:
case CFMA_OP_MASK: {
@@ -26691,8 +26691,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue Imm = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm);
- SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask, SDValue(),
- Subtarget, DAG);
+ SDValue FPclassMask =
+ getScalarMaskingNode(FPclass, Mask, SDValue(), Subtarget, DAG);
// Need to fill with zeros to ensure the bitcast will produce zeroes
// for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
@@ -26716,7 +26716,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (!isRoundModeCurDirection(Sae))
return SDValue();
}
- //default rounding mode
+ // default rounding mode
return DAG.getNode(IntrData->Opc0, dl, MaskVT,
{Op.getOperand(1), Op.getOperand(2), CC, Mask});
}
@@ -26734,12 +26734,12 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
else if (!isRoundModeCurDirection(Sae))
return SDValue();
}
- //default rounding mode
+ // default rounding mode
if (!Cmp.getNode())
Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC);
- SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, SDValue(),
- Subtarget, DAG);
+ SDValue CmpMask =
+ getScalarMaskingNode(Cmp, Mask, SDValue(), Subtarget, DAG);
// Need to fill with zeros to ensure the bitcast will produce zeroes
// for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
@@ -26907,8 +26907,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
assert(IntrData->Opc0 == X86ISD::BEXTRI && "Unexpected opcode");
uint64_t Imm = Op.getConstantOperandVal(2);
- SDValue Control = DAG.getTargetConstant(Imm & 0xffff, dl,
- Op.getValueType());
+ SDValue Control =
+ DAG.getTargetConstant(Imm & 0xffff, dl, Op.getValueType());
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
Op.getOperand(1), Control);
}
@@ -26930,7 +26930,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(3), GenCF.getValue(1));
}
SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG);
- SDValue Results[] = { SetCC, Res };
+ SDValue Results[] = {SetCC, Res};
return DAG.getMergeValues(Results, dl);
}
case CVTPD2PS_MASK:
@@ -27013,7 +27013,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
switch (IntNo) {
- default: return SDValue(); // Don't custom lower most intrinsics.
+ default:
+ return SDValue(); // Don't custom lower most intrinsics.
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
@@ -27047,7 +27048,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned TestOpc = X86ISD::PTEST;
X86::CondCode X86CC;
switch (IntNo) {
- default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+ default:
+ llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
case Intrinsic::x86_avx512_ktestc_b:
case Intrinsic::x86_avx512_ktestc_w:
case Intrinsic::x86_avx512_ktestc_d:
@@ -27118,7 +27120,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned Opcode;
X86::CondCode X86CC;
switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ default:
+ llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse42_pcmpistria128:
Opcode = X86ISD::PCMPISTR;
X86CC = X86::COND_A;
@@ -27288,7 +27291,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned NewIntrinsic;
switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ default:
+ llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_mmx_pslli_w:
NewIntrinsic = Intrinsic::x86_mmx_psll_w;
break;
@@ -27365,16 +27369,16 @@ static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
- SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
+ SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale};
SDValue Res =
DAG.getMemIntrinsicNode(X86ISD::MGATHER, dl, VTs, Ops,
MemIntr->getMemoryVT(), MemIntr->getMemOperand());
return DAG.getMergeValues({Res, Res.getValue(1)}, dl);
}
-static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG,
- SDValue Src, SDValue Mask, SDValue Base,
- SDValue Index, SDValue ScaleOp, SDValue Chain,
+static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG, SDValue Src,
+ SDValue Mask, SDValue Base, SDValue Index,
+ SDValue ScaleOp, SDValue Chain,
const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
@@ -27403,7 +27407,7 @@ static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG,
MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
- SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
+ SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale};
SDValue Res =
DAG.getMemIntrinsicNode(X86ISD::MGATHER, dl, VTs, Ops,
MemIntr->getMemoryVT(), MemIntr->getMemOperand());
@@ -27411,9 +27415,9 @@ static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG,
}
static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
- SDValue Src, SDValue Mask, SDValue Base,
- SDValue Index, SDValue ScaleOp, SDValue Chain,
- const X86Subtarget &Subtarget) {
+ SDValue Src, SDValue Mask, SDValue Base,
+ SDValue Index, SDValue ScaleOp, SDValue Chain,
+ const X86Subtarget &Subtarget) {
SDLoc dl(Op);
auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
// Scale must be constant.
@@ -27455,8 +27459,8 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
TLI.getPointerTy(DAG.getDataLayout()));
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
- MVT MaskVT =
- MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
+ MVT MaskVT = MVT::getVectorVT(
+ MVT::i1, Index.getSimpleValueType().getVectorNumElements());
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
SDValue Ops[] = {VMask, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops);
@@ -27472,11 +27476,11 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
/// expanded intrinsics implicitly defines extra registers (i.e. not just
/// EDX:EAX).
static SDValue expandIntrinsicWChainHelper(SDNode *N, const SDLoc &DL,
- SelectionDAG &DAG,
- unsigned TargetOpcode,
- unsigned SrcReg,
- const X86Subtarget &Subtarget,
- SmallVectorImpl<SDValue> &Results) {
+ SelectionDAG &DAG,
+ unsigned TargetOpcode,
+ unsigned SrcReg,
+ const X86Subtarget &Subtarget,
+ SmallVectorImpl<SDValue> &Results) {
SDValue Chain = N->getOperand(0);
SDValue Glue;
@@ -27516,7 +27520,7 @@ static SDValue expandIntrinsicWChainHelper(SDNode *N, const SDLoc &DL,
}
// Use a buildpair to merge the two 32-bit values into a 64-bit one.
- SDValue Ops[] = { LO, HI };
+ SDValue Ops[] = {LO, HI};
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
Results.push_back(Pair);
Results.push_back(Chain);
@@ -27533,9 +27537,9 @@ static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode,
// The processor's time-stamp counter (a 64-bit MSR) is stored into the
// EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR
// and the EAX register is loaded with the low-order 32 bits.
- SDValue Glue = expandIntrinsicWChainHelper(N, DL, DAG, Opcode,
- /* NoRegister */0, Subtarget,
- Results);
+ SDValue Glue =
+ expandIntrinsicWChainHelper(N, DL, DAG, Opcode,
+ /* NoRegister */ 0, Subtarget, Results);
if (Opcode != X86::RDTSCP)
return;
@@ -27593,24 +27597,24 @@ static SDValue MarkEHGuard(SDValue Op, SelectionDAG &DAG) {
}
/// Emit Truncating Store with signed or unsigned saturation.
-static SDValue
-EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &DL, SDValue Val,
- SDValue Ptr, EVT MemVT, MachineMemOperand *MMO,
- SelectionDAG &DAG) {
+static SDValue EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &DL,
+ SDValue Val, SDValue Ptr, EVT MemVT,
+ MachineMemOperand *MMO, SelectionDAG &DAG) {
SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue Undef = DAG.getUNDEF(Ptr.getValueType());
- SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ SDValue Ops[] = {Chain, Val, Ptr, Undef};
unsigned Opc = SignedSat ? X86ISD::VTRUNCSTORES : X86ISD::VTRUNCSTOREUS;
return DAG.getMemIntrinsicNode(Opc, DL, VTs, Ops, MemVT, MMO);
}
/// Emit Masked Truncating Store with signed or unsigned saturation.
static SDValue EmitMaskedTruncSStore(bool SignedSat, SDValue Chain,
- const SDLoc &DL,
- SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT,
- MachineMemOperand *MMO, SelectionDAG &DAG) {
+ const SDLoc &DL, SDValue Val, SDValue Ptr,
+ SDValue Mask, EVT MemVT,
+ MachineMemOperand *MMO,
+ SelectionDAG &DAG) {
SDVTList VTs = DAG.getVTList(MVT::Other);
- SDValue Ops[] = { Chain, Val, Ptr, Mask };
+ SDValue Ops[] = {Chain, Val, Ptr, Mask};
unsigned Opc = SignedSat ? X86ISD::VMTRUNCSTORES : X86ISD::VMTRUNCSTOREUS;
return DAG.getMemIntrinsicNode(Opc, DL, VTs, Ops, MemVT, MMO);
}
@@ -27678,9 +27682,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
SDLoc dl(Op);
// Create a WRPKRU node, pass the input to the EAX parameter, and pass 0
// to the EDX and ECX parameters.
- return DAG.getNode(X86ISD::WRPKRU, dl, MVT::Other,
- Op.getOperand(0), Op.getOperand(2),
- DAG.getConstant(0, dl, MVT::i32),
+ return DAG.getNode(X86ISD::WRPKRU, dl, MVT::Other, Op.getOperand(0),
+ Op.getOperand(2), DAG.getConstant(0, dl, MVT::i32),
DAG.getConstant(0, dl, MVT::i32));
}
case llvm::Intrinsic::asan_check_memaccess: {
@@ -27711,7 +27714,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
unsigned Opcode;
switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic");
+ default:
+ llvm_unreachable("Impossible intrinsic");
case Intrinsic::x86_umwait:
Opcode = X86ISD::UMWAIT;
break;
@@ -27724,9 +27728,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
break;
}
- SDValue Operation =
- DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2),
- Op->getOperand(3), Op->getOperand(4));
+ SDValue Operation = DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2),
+ Op->getOperand(3), Op->getOperand(4));
SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG);
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
Operation.getValue(1));
@@ -27738,7 +27741,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
unsigned Opcode;
switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic!");
+ default:
+ llvm_unreachable("Impossible intrinsic!");
case Intrinsic::x86_enqcmd:
Opcode = X86ISD::ENQCMD;
break;
@@ -27762,7 +27766,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
unsigned Opcode;
switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic");
+ default:
+ llvm_unreachable("Impossible intrinsic");
case Intrinsic::x86_aesenc128kl:
Opcode = X86ISD::AESENC128KL;
break;
@@ -27800,7 +27805,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
unsigned Opcode;
switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic");
+ default:
+ llvm_unreachable("Impossible intrinsic");
case Intrinsic::x86_aesencwide128kl:
Opcode = X86ISD::AESENCWIDE128KL;
break;
@@ -27955,9 +27961,9 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
SDValue Src2 = Op.getOperand(4);
SDValue CC = Op.getOperand(5);
MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();
- SDValue Operation = DAG.getMemIntrinsicNode(
- X86ISD::CMPCCXADD, DL, Op->getVTList(), {Chain, Addr, Src1, Src2, CC},
- MVT::i32, MMO);
+ SDValue Operation =
+ DAG.getMemIntrinsicNode(X86ISD::CMPCCXADD, DL, Op->getVTList(),
+ {Chain, Addr, Src1, Src2, CC}, MVT::i32, MMO);
return Operation;
}
case Intrinsic::x86_aadd32:
@@ -28041,8 +28047,9 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
}
SDLoc dl(Op);
- switch(IntrData->Type) {
- default: llvm_unreachable("Unknown Intrinsic Type");
+ switch (IntrData->Type) {
+ default:
+ llvm_unreachable("Unknown Intrinsic Type");
case RDSEED:
case RDRAND: {
// Emit the node with the right value type.
@@ -28063,32 +28070,32 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
}
case GATHER_AVX2: {
SDValue Chain = Op.getOperand(0);
- SDValue Src = Op.getOperand(2);
- SDValue Base = Op.getOperand(3);
+ SDValue Src = Op.getOperand(2);
+ SDValue Base = Op.getOperand(3);
SDValue Index = Op.getOperand(4);
- SDValue Mask = Op.getOperand(5);
+ SDValue Mask = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
return getAVX2GatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,
Scale, Chain, Subtarget);
}
case GATHER: {
- //gather(v1, mask, index, base, scale);
+ // gather(v1, mask, index, base, scale);
SDValue Chain = Op.getOperand(0);
- SDValue Src = Op.getOperand(2);
- SDValue Base = Op.getOperand(3);
+ SDValue Src = Op.getOperand(2);
+ SDValue Base = Op.getOperand(3);
SDValue Index = Op.getOperand(4);
- SDValue Mask = Op.getOperand(5);
+ SDValue Mask = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
- return getGatherNode(Op, DAG, Src, Mask, Base, Index, Scale,
- Chain, Subtarget);
+ return getGatherNode(Op, DAG, Src, Mask, Base, Index, Scale, Chain,
+ Subtarget);
}
case SCATTER: {
- //scatter(base, mask, index, v1, scale);
+ // scatter(base, mask, index, v1, scale);
SDValue Chain = Op.getOperand(0);
- SDValue Base = Op.getOperand(2);
- SDValue Mask = Op.getOperand(3);
+ SDValue Base = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
SDValue Index = Op.getOperand(4);
- SDValue Src = Op.getOperand(5);
+ SDValue Src = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,
Scale, Chain, Subtarget);
@@ -28099,9 +28106,9 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
"Wrong prefetch hint in intrinsic: should be 2 or 3");
unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0);
SDValue Chain = Op.getOperand(0);
- SDValue Mask = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(2);
SDValue Index = Op.getOperand(3);
- SDValue Base = Op.getOperand(4);
+ SDValue Base = Op.getOperand(4);
SDValue Scale = Op.getOperand(5);
return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain,
Subtarget);
@@ -28136,8 +28143,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
SDValue SetCC = getSETCC(X86::COND_NE, InTrans, dl, DAG);
SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);
- return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
- Ret, SDValue(InTrans.getNode(), 1));
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Ret,
+ SDValue(InTrans.getNode(), 1));
}
case TRUNCATE_TO_MEM_VI8:
case TRUNCATE_TO_MEM_VI16:
@@ -28150,7 +28157,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
assert(MemIntr && "Expected MemIntrinsicSDNode!");
- EVT MemVT = MemIntr->getMemoryVT();
+ EVT MemVT = MemIntr->getMemoryVT();
uint16_t TruncationOp = IntrData->Opc0;
switch (TruncationOp) {
@@ -28248,7 +28255,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
Register FrameReg =
RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
- SDLoc dl(Op); // FIXME probably not meaningful
+ SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = Op.getConstantOperandVal(0);
assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
(FrameReg == X86::EBP && VT == MVT::i32)) &&
@@ -28262,7 +28269,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-Register X86TargetLowering::getRegisterByName(const char* RegName, LLT VT,
+Register X86TargetLowering::getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const {
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
@@ -28322,10 +28329,10 @@ bool X86TargetLowering::needsFixedCatchObjects() const {
}
SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
- SDValue Chain = Op.getOperand(0);
- SDValue Offset = Op.getOperand(1);
- SDValue Handler = Op.getOperand(2);
- SDLoc dl (Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Offset = Op.getOperand(1);
+ SDValue Handler = Op.getOperand(2);
+ SDLoc dl(Op);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -28336,9 +28343,9 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT);
Register StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX;
- SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
- DAG.getIntPtrConstant(RegInfo->getSlotSize(),
- dl));
+ SDValue StoreAddr =
+ DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
+ DAG.getIntPtrConstant(RegInfo->getSlotSize(), dl));
StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset);
Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo());
Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
@@ -28361,19 +28368,20 @@ SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
(void)TII->getGlobalBaseReg(&DAG.getMachineFunction());
}
return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL,
- DAG.getVTList(MVT::i32, MVT::Other),
- Op.getOperand(0), Op.getOperand(1));
+ DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
+ Op.getOperand(1));
}
SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
- return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
- Op.getOperand(0), Op.getOperand(1));
+ return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1));
}
-SDValue X86TargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue
+X86TargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
return DAG.getNode(X86ISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
Op.getOperand(0));
@@ -28389,7 +28397,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
SDValue Nest = Op.getOperand(3); // 'nest' parameter value
- SDLoc dl (Op);
+ SDLoc dl(Op);
const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
@@ -28398,7 +28406,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SDValue OutChains[6];
// Large code-model.
- const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode.
+ const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode.
const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.
const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7;
@@ -28446,7 +28454,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
} else {
const Function *Func =
- cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
+ cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
CallingConv::ID CC = Func->getCallingConv();
unsigned NestReg;
@@ -28468,7 +28476,8 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
unsigned Idx = 0;
for (FunctionType::param_iterator I = FTy->param_begin(),
- E = FTy->param_end(); I != E; ++I, ++Idx)
+ E = FTy->param_end();
+ I != E; ++I, ++Idx)
if (Attrs.hasParamAttr(Idx, Attribute::InReg)) {
const DataLayout &DL = DAG.getDataLayout();
// FIXME: should only count parameters that are lowered to integers.
@@ -28574,18 +28583,16 @@ SDValue X86TargetLowering::LowerGET_ROUNDING(SDValue Op,
Chain = CWD.getValue(1);
// Mask and turn the control bits into a shift for the lookup table.
- SDValue Shift =
- DAG.getNode(ISD::SRL, DL, MVT::i16,
- DAG.getNode(ISD::AND, DL, MVT::i16,
- CWD, DAG.getConstant(0xc00, DL, MVT::i16)),
- DAG.getConstant(9, DL, MVT::i8));
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i16,
+ DAG.getNode(ISD::AND, DL, MVT::i16, CWD,
+ DAG.getConstant(0xc00, DL, MVT::i16)),
+ DAG.getConstant(9, DL, MVT::i8));
Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Shift);
SDValue LUT = DAG.getConstant(0x2d, DL, MVT::i32);
- SDValue RetVal =
- DAG.getNode(ISD::AND, DL, MVT::i32,
- DAG.getNode(ISD::SRL, DL, MVT::i32, LUT, Shift),
- DAG.getConstant(3, DL, MVT::i32));
+ SDValue RetVal = DAG.getNode(ISD::AND, DL, MVT::i32,
+ DAG.getNode(ISD::SRL, DL, MVT::i32, LUT, Shift),
+ DAG.getConstant(3, DL, MVT::i32));
RetVal = DAG.getZExtOrTrunc(RetVal, DL, VT);
@@ -28625,14 +28632,14 @@ SDValue X86TargetLowering::LowerSET_ROUNDING(SDValue Op,
uint64_t RM = CVal->getZExtValue();
int FieldVal;
switch (static_cast<RoundingMode>(RM)) {
- // clang-format off
+ // clang-format off
case RoundingMode::NearestTiesToEven: FieldVal = X86::rmToNearest; break;
case RoundingMode::TowardNegative: FieldVal = X86::rmDownward; break;
case RoundingMode::TowardPositive: FieldVal = X86::rmUpward; break;
case RoundingMode::TowardZero: FieldVal = X86::rmTowardZero; break;
default:
llvm_unreachable("rounding mode is not supported by X86 hardware");
- // clang-format on
+ // clang-format on
}
RMBits = DAG.getConstant(FieldVal, DL, MVT::i16);
} else {
@@ -28873,17 +28880,15 @@ static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG,
MVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
- assert((EltVT == MVT::i8 || EltVT == MVT::i16) &&
- "Unsupported element type");
+ assert((EltVT == MVT::i8 || EltVT == MVT::i16) && "Unsupported element type");
// Split vector, it's Lo and Hi parts will be handled in next iteration.
- if (NumElems > 16 ||
- (NumElems == 16 && !Subtarget.canExtendTo512DQ()))
+ if (NumElems > 16 || (NumElems == 16 && !Subtarget.canExtendTo512DQ()))
return splitVectorIntUnary(Op, DAG, dl);
MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
assert((NewVT.is256BitVector() || NewVT.is512BitVector()) &&
- "Unsupported value type for operation");
+ "Unsupported value type for operation");
// Use native supported vector instruction vplzcntd.
Op = DAG.getNode(ISD::ZERO_EXTEND, dl, NewVT, Op.getOperand(0));
@@ -28998,7 +29003,35 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB");
return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG);
}
+static SDValue LowerVectorCTLZ_GFNI(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ SDLoc dl(Op);
+ MVT VT = Op.getSimpleValueType();
+ SDValue Input = Op.getOperand(0);
+ if (!VT.isVector() || VT.getVectorElementType() != MVT::i8)
+ return SDValue();
+ SmallVector<SDValue, 16> MatrixVals;
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
+ uint8_t mask = 1 << (7 - (i % 8));
+ MatrixVals.push_back(DAG.getConstant(mask, dl, MVT::i8));
+ }
+
+ SDValue Matrix = DAG.getBuildVector(VT, dl, MatrixVals);
+ SDValue Reversed = DAG.getNode(X86ISD::GF2P8AFFINEQB, dl, VT, Input, Matrix,
+ DAG.getTargetConstant(0, dl, MVT::i8));
+ SDValue AddMask = DAG.getConstant(0xFF, dl, MVT::i8);
+
+ SDValue AddVec = DAG.getSplatBuildVector(VT, dl, AddMask);
+ SDValue Summed = DAG.getNode(ISD::ADD, dl, VT, Reversed, AddVec);
+ SDValue NotSummed = DAG.getNode(ISD::XOR, dl, VT, Summed, AddVec);
+ SDValue Filtered = DAG.getNode(ISD::AND, dl, VT, NotSummed, Reversed);
+ SDValue FinalMatrix = DAG.getBuildVector(VT, dl, MatrixVals);
+ SDValue LZCNT =
+ DAG.getNode(X86ISD::GF2P8AFFINEQB, dl, VT, Filtered, FinalMatrix,
+ DAG.getTargetConstant(8, dl, MVT::i8));
+ return LZCNT;
+}
static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
@@ -29007,6 +29040,10 @@ static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
SDLoc dl(Op);
unsigned Opc = Op.getOpcode();
+
+ if (VT.isVector() && VT.getScalarType() == MVT::i8 && Subtarget.hasGFNI())
+ return LowerVectorCTLZ_GFNI(Op, DAG, Subtarget);
+
if (VT.isVector())
return LowerVectorCTLZ(Op, dl, Subtarget, DAG);
@@ -29525,10 +29562,10 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
SmallVector<SDValue, 16> LoOps, HiOps;
for (unsigned i = 0; i != NumElts; i += 16) {
for (unsigned j = 0; j != 8; ++j) {
- LoOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j), dl,
- MVT::i16));
- HiOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j + 8), dl,
- MVT::i16));
+ LoOps.push_back(
+ DAG.getAnyExtOrTrunc(B.getOperand(i + j), dl, MVT::i16));
+ HiOps.push_back(
+ DAG.getAnyExtOrTrunc(B.getOperand(i + j + 8), dl, MVT::i16));
}
}
@@ -29569,7 +29606,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
// Merge the two vectors back together with a shuffle. This expands into 2
// shuffles.
- static const int ShufMask[] = { 0, 4, 2, 6 };
+ static const int ShufMask[] = {0, 4, 2, 6};
return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
}
@@ -29734,7 +29771,7 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
//
// Place the odd value at an even position (basically, shift all values 1
// step to the left):
- const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1,
+ const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1,
9, -1, 11, -1, 13, -1, 15, -1};
// <a|b|c|d> => <b|undef|d|undef>
SDValue Odd0 =
@@ -29784,7 +29821,7 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
// Only i8 vectors should need custom lowering after this.
assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) ||
- (VT == MVT::v64i8 && Subtarget.hasBWI())) &&
+ (VT == MVT::v64i8 && Subtarget.hasBWI())) &&
"Unsupported vector type");
// Lower v16i8/v32i8 as extension to v8i16/v16i16 vector pairs, multiply,
@@ -29939,7 +29976,8 @@ static SDValue LowerMULO(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getMergeValues({Low, Ovf}, dl);
}
-SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op,
+ SelectionDAG &DAG) const {
assert(Subtarget.isTargetWin64() && "Unexpected target");
EVT VT = Op.getValueType();
assert(VT.isInteger() && VT.getSizeInBits() == 128 &&
@@ -29954,13 +29992,13 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
RTLIB::Libcall LC;
bool isSigned;
switch (Op->getOpcode()) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Unexpected request for libcall!");
case ISD::SDIV: isSigned = true; LC = RTLIB::SDIV_I128; break;
case ISD::UDIV: isSigned = false; LC = RTLIB::UDIV_I128; break;
case ISD::SREM: isSigned = true; LC = RTLIB::SREM_I128; break;
case ISD::UREM: isSigned = false; LC = RTLIB::UREM_I128; break;
- // clang-format on
+ // clang-format on
}
SDLoc dl(Op);
@@ -30104,9 +30142,9 @@ static bool supportedVectorShiftWithImm(EVT VT, const X86Subtarget &Subtarget,
// The shift amount is a variable, but it is the same for all vector lanes.
// These instructions are defined together with shift-immediate.
-static
-bool supportedVectorShiftWithBaseAmnt(EVT VT, const X86Subtarget &Subtarget,
- unsigned Opcode) {
+static bool supportedVectorShiftWithBaseAmnt(EVT VT,
+ const X86Subtarget &Subtarget,
+ unsigned Opcode) {
return supportedVectorShiftWithImm(VT, Subtarget, Opcode);
}
@@ -30135,7 +30173,7 @@ static bool supportedVectorVarShift(EVT VT, const X86Subtarget &Subtarget,
return true;
bool LShift = VT.is128BitVector() || VT.is256BitVector();
- bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64;
+ bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64;
return (Opcode == ISD::SRA) ? AShift : LShift;
}
@@ -32089,7 +32127,8 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
auto SSID = AI->getSyncScopeID();
// We must restrict the ordering to avoid generating loads with Release or
// ReleaseAcquire orderings.
- auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering());
+ auto Order =
+ AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering());
// Before the load we need a fence. Here is an example lifted from
// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf showing why a fence
@@ -32158,31 +32197,28 @@ static SDValue emitLockedStackOp(SelectionDAG &DAG,
if (Subtarget.is64Bit()) {
SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
- SDValue Ops[] = {
- DAG.getRegister(X86::RSP, MVT::i64), // Base
- DAG.getTargetConstant(1, DL, MVT::i8), // Scale
- DAG.getRegister(0, MVT::i64), // Index
- DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp
- DAG.getRegister(0, MVT::i16), // Segment.
- Zero,
- Chain};
- SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32,
- MVT::Other, Ops);
+ SDValue Ops[] = {DAG.getRegister(X86::RSP, MVT::i64), // Base
+ DAG.getTargetConstant(1, DL, MVT::i8), // Scale
+ DAG.getRegister(0, MVT::i64), // Index
+ DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp
+ DAG.getRegister(0, MVT::i16), // Segment.
+ Zero,
+ Chain};
+ SDNode *Res =
+ DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32, MVT::Other, Ops);
return SDValue(Res, 1);
}
SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
- SDValue Ops[] = {
- DAG.getRegister(X86::ESP, MVT::i32), // Base
- DAG.getTargetConstant(1, DL, MVT::i8), // Scale
- DAG.getRegister(0, MVT::i32), // Index
- DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp
- DAG.getRegister(0, MVT::i16), // Segment.
- Zero,
- Chain
- };
- SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32,
- MVT::Other, Ops);
+ SDValue Ops[] = {DAG.getRegister(X86::ESP, MVT::i32), // Base
+ DAG.getTargetConstant(1, DL, MVT::i8), // Scale
+ DAG.getRegister(0, MVT::i32), // Index
+ DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp
+ DAG.getRegister(0, MVT::i16), // Segment.
+ Zero,
+ Chain};
+ SDNode *Res =
+ DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32, MVT::Other, Ops);
return SDValue(Res, 1);
}
@@ -32215,36 +32251,44 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget,
SDLoc DL(Op);
unsigned Reg = 0;
unsigned size = 0;
- switch(T.SimpleTy) {
- default: llvm_unreachable("Invalid value type!");
- case MVT::i8: Reg = X86::AL; size = 1; break;
- case MVT::i16: Reg = X86::AX; size = 2; break;
- case MVT::i32: Reg = X86::EAX; size = 4; break;
+ switch (T.SimpleTy) {
+ default:
+ llvm_unreachable("Invalid value type!");
+ case MVT::i8:
+ Reg = X86::AL;
+ size = 1;
+ break;
+ case MVT::i16:
+ Reg = X86::AX;
+ size = 2;
+ break;
+ case MVT::i32:
+ Reg = X86::EAX;
+ size = 4;
+ break;
case MVT::i64:
assert(Subtarget.is64Bit() && "Node not type legal!");
- Reg = X86::RAX; size = 8;
+ Reg = X86::RAX;
+ size = 8;
break;
}
- SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
- Op.getOperand(2), SDValue());
- SDValue Ops[] = { cpIn.getValue(0),
- Op.getOperand(1),
- Op.getOperand(3),
- DAG.getTargetConstant(size, DL, MVT::i8),
- cpIn.getValue(1) };
+ SDValue cpIn =
+ DAG.getCopyToReg(Op.getOperand(0), DL, Reg, Op.getOperand(2), SDValue());
+ SDValue Ops[] = {cpIn.getValue(0), Op.getOperand(1), Op.getOperand(3),
+ DAG.getTargetConstant(size, DL, MVT::i8), cpIn.getValue(1)};
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
- SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
- Ops, T, MMO);
+ SDValue Result =
+ DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, Ops, T, MMO);
SDValue cpOut =
- DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
+ DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS,
MVT::i32, cpOut.getValue(2));
SDValue Success = getSETCC(X86::COND_E, EFLAGS, DL, DAG);
- return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(),
- cpOut, Success, EFLAGS.getValue(1));
+ return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), cpOut, Success,
+ EFLAGS.getValue(1));
}
// Create MOVMSKB, taking into account whether we need to split for AVX1.
@@ -32306,7 +32350,8 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
}
assert((SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
- SrcVT == MVT::i64) && "Unexpected VT!");
+ SrcVT == MVT::i64) &&
+ "Unexpected VT!");
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
if (!(DstVT == MVT::f64 && SrcVT == MVT::i64) &&
@@ -32320,8 +32365,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
// Example: from MVT::v2i32 to MVT::v4i32.
MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(),
SrcVT.getVectorNumElements() * 2);
- Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src,
- DAG.getUNDEF(SrcVT));
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src, DAG.getUNDEF(SrcVT));
} else {
assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() &&
"Unexpected source type in LowerBITCAST");
@@ -32467,7 +32511,8 @@ static SDValue LowerVectorCTPOP(SDValue Op, const SDLoc &DL,
if (Subtarget.hasVPOPCNTDQ()) {
unsigned NumElems = VT.getVectorNumElements();
assert((VT.getVectorElementType() == MVT::i8 ||
- VT.getVectorElementType() == MVT::i16) && "Unexpected type");
+ VT.getVectorElementType() == MVT::i16) &&
+ "Unexpected type");
if (NumElems < 16 || (NumElems == 16 && Subtarget.canExtendTo512DQ())) {
MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
Op = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, Op0);
@@ -32865,16 +32910,16 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
SDValue NewChain = DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Chain);
assert(!N->hasAnyUseOfValue(0));
// NOTE: The getUNDEF is needed to give something for the unused result 0.
- return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),
- DAG.getUNDEF(VT), NewChain);
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), DAG.getUNDEF(VT),
+ NewChain);
}
SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG, Subtarget);
// RAUW the chain, but don't worry about the result, as it's unused.
assert(!N->hasAnyUseOfValue(0));
// NOTE: The getUNDEF is needed to give something for the unused result 0.
- return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),
- DAG.getUNDEF(VT), LockOp.getValue(1));
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), DAG.getUNDEF(VT),
+ LockOp.getValue(1));
}
static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
@@ -32974,17 +33019,17 @@ static SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) {
// Set the carry flag.
SDValue Carry = Op.getOperand(2);
EVT CarryVT = Carry.getValueType();
- Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
- Carry, DAG.getAllOnesConstant(DL, CarryVT));
+ Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), Carry,
+ DAG.getAllOnesConstant(DL, CarryVT));
bool IsAdd = Opc == ISD::UADDO_CARRY || Opc == ISD::SADDO_CARRY;
- SDValue Sum = DAG.getNode(IsAdd ? X86ISD::ADC : X86ISD::SBB, DL, VTs,
- Op.getOperand(0), Op.getOperand(1),
- Carry.getValue(1));
+ SDValue Sum =
+ DAG.getNode(IsAdd ? X86ISD::ADC : X86ISD::SBB, DL, VTs, Op.getOperand(0),
+ Op.getOperand(1), Carry.getValue(1));
bool IsSigned = Opc == ISD::SADDO_CARRY || Opc == ISD::SSUBO_CARRY;
- SDValue SetCC = getSETCC(IsSigned ? X86::COND_O : X86::COND_B,
- Sum.getValue(1), DL, DAG);
+ SDValue SetCC =
+ getSETCC(IsSigned ? X86::COND_O : X86::COND_B, Sum.getValue(1), DL, DAG);
if (N->getValueType(1) == MVT::i1)
SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
@@ -33136,8 +33181,8 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
if (!Subtarget.hasVLX() && !VT.is512BitVector() &&
!Index.getSimpleValueType().is512BitVector()) {
// Determine how much we need to widen by to get a 512-bit type.
- unsigned Factor = std::min(512/VT.getSizeInBits(),
- 512/IndexVT.getSizeInBits());
+ unsigned Factor =
+ std::min(512 / VT.getSizeInBits(), 512 / IndexVT.getSizeInBits());
unsigned NumElts = VT.getVectorNumElements() * Factor;
VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);
@@ -33179,7 +33224,7 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
N->isExpandingLoad());
// Emit a blend.
SDValue Select = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
- return DAG.getMergeValues({ Select, NewLoad.getValue(1) }, dl);
+ return DAG.getMergeValues({Select, NewLoad.getValue(1)}, dl);
}
assert((!N->isExpandingLoad() || Subtarget.hasAVX512()) &&
@@ -33191,10 +33236,10 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
"Cannot lower masked load op.");
- assert((ScalarVT.getSizeInBits() >= 32 ||
- (Subtarget.hasBWI() &&
- (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
- "Unsupported masked load op.");
+ assert(
+ (ScalarVT.getSizeInBits() >= 32 ||
+ (Subtarget.hasBWI() && (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
+ "Unsupported masked load op.");
// This operation is legal for targets with VLX, but without
// VLX the vector should be widened to 512 bit
@@ -33239,14 +33284,14 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
"Cannot lower masked store op.");
- assert((ScalarVT.getSizeInBits() >= 32 ||
- (Subtarget.hasBWI() &&
- (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
- "Unsupported masked store op.");
+ assert(
+ (ScalarVT.getSizeInBits() >= 32 ||
+ (Subtarget.hasBWI() && (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
+ "Unsupported masked store op.");
// This operation is legal for targets with VLX, but without
// VLX the vector should be widened to 512 bit
- unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
+ unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits();
MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
// Mask element has to be i1.
@@ -33288,8 +33333,8 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
if (Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
!IndexVT.is512BitVector()) {
// Determine how much we need to widen by to get a 512-bit type.
- unsigned Factor = std::min(512/VT.getSizeInBits(),
- 512/IndexVT.getSizeInBits());
+ unsigned Factor =
+ std::min(512 / VT.getSizeInBits(), 512 / IndexVT.getSizeInBits());
unsigned NumElts = VT.getVectorNumElements() * Factor;
@@ -33306,8 +33351,8 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
if (PassThru.isUndef())
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
- SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
- N->getScale() };
+ SDValue Ops[] = {N->getChain(), PassThru, Mask,
+ N->getBasePtr(), Index, N->getScale()};
SDValue NewGather = DAG.getMemIntrinsicNode(
X86ISD::MGATHER, dl, DAG.getVTList(VT, MVT::Other), Ops, N->getMemoryVT(),
N->getMemOperand());
@@ -33505,7 +33550,7 @@ SDValue X86TargetLowering::visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL,
/// Provide custom lowering hooks for some operations.
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Should not custom lower this!");
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG);
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
@@ -33661,14 +33706,14 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADDRSPACECAST: return LowerADDRSPACECAST(Op, DAG);
case X86ISD::CVTPS2PH: return LowerCVTPS2PH(Op, DAG);
case ISD::PREFETCH: return LowerPREFETCH(Op, Subtarget, DAG);
- // clang-format on
+ // clang-format on
}
}
/// Replace a node with an illegal result type with a new node built out of
/// custom code.
void X86TargetLowering::ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue>&Results,
+ SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
SDLoc dl(N);
unsigned Opc = N->getOpcode();
@@ -33794,8 +33839,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Ovf = DAG.getSetCC(dl, N->getValueType(1), Hi, HiCmp, ISD::SETNE);
// Widen the result with by padding with undef.
- Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Res,
- DAG.getUNDEF(VT));
+ Res =
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Res, DAG.getUNDEF(VT));
Results.push_back(Res);
Results.push_back(Ovf);
return;
@@ -33812,11 +33857,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
"Unexpected type action!");
unsigned NumConcat = 128 / InVT.getSizeInBits();
- EVT InWideVT = EVT::getVectorVT(*DAG.getContext(),
- InVT.getVectorElementType(),
- NumConcat * InVT.getVectorNumElements());
- EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
- VT.getVectorElementType(),
+ EVT InWideVT =
+ EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+ NumConcat * InVT.getVectorNumElements());
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
NumConcat * VT.getVectorNumElements());
SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
@@ -33880,7 +33924,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
- SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG);
+ SDValue V = LowerWin64_i128OP(SDValue(N, 0), DAG);
Results.push_back(V);
return;
}
@@ -33958,9 +34002,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Lo = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Lo);
Hi = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Hi);
- SDValue Res = DAG.getVectorShuffle(MVT::v16i8, dl, Lo, Hi,
- { 0, 1, 2, 3, 16, 17, 18, 19,
- -1, -1, -1, -1, -1, -1, -1, -1 });
+ SDValue Res = DAG.getVectorShuffle(
+ MVT::v16i8, dl, Lo, Hi,
+ {0, 1, 2, 3, 16, 17, 18, 19, -1, -1, -1, -1, -1, -1, -1, -1});
Results.push_back(Res);
return;
}
@@ -33992,7 +34036,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue In = N->getOperand(0);
EVT InVT = In.getValueType();
if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&
- (InVT == MVT::v4i16 || InVT == MVT::v4i8)){
+ (InVT == MVT::v4i16 || InVT == MVT::v4i8)) {
assert(getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector &&
"Unexpected type action!");
assert(Opc == ISD::SIGN_EXTEND && "Unexpected opcode");
@@ -34008,11 +34052,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
// Create an unpackl and unpackh to interleave the sign bits then bitcast
// to v2i64.
- SDValue Lo = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
- {0, 4, 1, 5});
+ SDValue Lo =
+ DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, {0, 4, 1, 5});
Lo = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Lo);
- SDValue Hi = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
- {2, 6, 3, 7});
+ SDValue Hi =
+ DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, {2, 6, 3, 7});
Hi = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Hi);
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
@@ -34199,7 +34243,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
-
if (VT == MVT::v2i32) {
assert((!IsStrict || IsSigned || Subtarget.hasAVX512()) &&
"Strict unsigned conversion requires AVX512");
@@ -34284,9 +34327,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl);
- SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT,
- DAG.getConstantFP(0.0, dl, VecInVT), Src,
- ZeroIdx);
+ SDValue Res =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT,
+ DAG.getConstantFP(0.0, dl, VecInVT), Src, ZeroIdx);
SDValue Chain;
if (IsStrict) {
SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
@@ -34373,8 +34416,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
EVT SrcVT = Src.getValueType();
if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) {
if (IsStrict) {
- unsigned Opc = IsSigned ? X86ISD::STRICT_CVTSI2P
- : X86ISD::STRICT_CVTUI2P;
+ unsigned Opc =
+ IsSigned ? X86ISD::STRICT_CVTSI2P : X86ISD::STRICT_CVTUI2P;
SDValue Res = DAG.getNode(Opc, dl, {MVT::v4f32, MVT::Other},
{N->getOperand(0), Src});
Results.push_back(Res);
@@ -34388,7 +34431,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
if (SrcVT == MVT::v2i64 && !IsSigned && Subtarget.is64Bit() &&
Subtarget.hasSSE41() && !Subtarget.hasAVX512()) {
SDValue Zero = DAG.getConstant(0, dl, SrcVT);
- SDValue One = DAG.getConstant(1, dl, SrcVT);
+ SDValue One = DAG.getConstant(1, dl, SrcVT);
SDValue Sign = DAG.getNode(ISD::OR, dl, SrcVT,
DAG.getNode(ISD::SRL, dl, SrcVT, Src, One),
DAG.getNode(ISD::AND, dl, SrcVT, Src, One));
@@ -34454,9 +34497,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
if (IsStrict) {
SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other},
{N->getOperand(0), Or, VBias});
- SDValue Res = DAG.getNode(X86ISD::STRICT_VFPROUND, dl,
- {MVT::v4f32, MVT::Other},
- {Sub.getValue(1), Sub});
+ SDValue Res =
+ DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {MVT::v4f32, MVT::Other},
+ {Sub.getValue(1), Sub});
Results.push_back(Res);
Results.push_back(Res.getValue(1));
} else {
@@ -34537,8 +34580,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = N->getConstantOperandVal(1);
switch (IntNo) {
- default : llvm_unreachable("Do not know how to custom type "
- "legalize this intrinsic operation!");
+ default:
+ llvm_unreachable("Do not know how to custom type "
+ "legalize this intrinsic operation!");
case Intrinsic::x86_rdtsc:
return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget,
Results);
@@ -34551,7 +34595,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
case Intrinsic::x86_rdpru:
expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPRU, X86::ECX, Subtarget,
- Results);
+ Results);
return;
case Intrinsic::x86_xgetbv:
expandIntrinsicWChainHelper(N, dl, DAG, X86::XGETBV, X86::ECX, Subtarget,
@@ -34608,12 +34652,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
- Regs64bit ? X86::RAX : X86::EAX,
- HalfT, Result.getValue(1));
+ Regs64bit ? X86::RAX : X86::EAX, HalfT,
+ Result.getValue(1));
SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl,
- Regs64bit ? X86::RDX : X86::EDX,
- HalfT, cpOutL.getValue(2));
- SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
+ Regs64bit ? X86::RDX : X86::EDX, HalfT,
+ cpOutL.getValue(2));
+ SDValue OpsF[] = {cpOutL.getValue(0), cpOutH.getValue(0)};
SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS,
MVT::i32, cpOutH.getValue(2));
@@ -34655,7 +34699,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
// Then extract the lower 64-bits.
MVT LdVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32;
SDVTList Tys = DAG.getVTList(LdVT, MVT::Other);
- SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
+ SDValue Ops[] = {Node->getChain(), Node->getBasePtr()};
SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
MVT::i64, Node->getMemOperand());
if (Subtarget.hasSSE2()) {
@@ -34679,10 +34723,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
// First load this into an 80-bit X87 register. This will put the whole
// integer into the significand.
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
- SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
- SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD,
- dl, Tys, Ops, MVT::i64,
- Node->getMemOperand());
+ SDValue Ops[] = {Node->getChain(), Node->getBasePtr()};
+ SDValue Result = DAG.getMemIntrinsicNode(
+ X86ISD::FILD, dl, Tys, Ops, MVT::i64, Node->getMemOperand());
SDValue Chain = Result.getValue(1);
// Now store the X87 register to a stack temporary and convert to i64.
@@ -34693,7 +34736,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
MachinePointerInfo MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
- SDValue StoreOps[] = { Chain, Result, StackPtr };
+ SDValue StoreOps[] = {Chain, Result, StackPtr};
Chain = DAG.getMemIntrinsicNode(
X86ISD::FIST, dl, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
MPI, std::nullopt /*Align*/, MachineMemOperand::MOStore);
@@ -34751,8 +34794,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
assert(getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector &&
"Unexpected type action!");
EVT WideVT = getTypeToTransformTo(*DAG.getContext(), DstVT);
- SDValue Res = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64,
- N->getOperand(0));
+ SDValue Res =
+ DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, N->getOperand(0));
Res = DAG.getBitcast(WideVT, Res);
Results.push_back(Res);
return;
@@ -34774,8 +34817,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Mask = Gather->getMask();
assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT,
- Gather->getPassThru(),
- DAG.getUNDEF(VT));
+ Gather->getPassThru(), DAG.getUNDEF(VT));
if (!Subtarget.hasVLX()) {
// We need to widen the mask, but the instruction will only use 2
// of its elements. So we can use undef.
@@ -34783,8 +34825,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
DAG.getUNDEF(MVT::v2i1));
Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask);
}
- SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
- Gather->getBasePtr(), Index, Gather->getScale() };
+ SDValue Ops[] = {Gather->getChain(), PassThru, Mask,
+ Gather->getBasePtr(), Index, Gather->getScale()};
SDValue Res = DAG.getMemIntrinsicNode(
X86ISD::MGATHER, dl, DAG.getVTList(WideVT, MVT::Other), Ops,
Gather->getMemoryVT(), Gather->getMemOperand());
@@ -34829,7 +34871,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::ADDRSPACECAST: {
- SDValue V = LowerADDRSPACECAST(SDValue(N,0), DAG);
+ SDValue V = LowerADDRSPACECAST(SDValue(N, 0), DAG);
Results.push_back(V);
return;
}
@@ -34860,471 +34902,474 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((X86ISD::NodeType)Opcode) {
- case X86ISD::FIRST_NUMBER: break;
-#define NODE_NAME_CASE(NODE) case X86ISD::NODE: return "X86ISD::" #NODE;
- NODE_NAME_CASE(BSF)
- NODE_NAME_CASE(BSR)
- NODE_NAME_CASE(FSHL)
- NODE_NAME_CASE(FSHR)
- NODE_NAME_CASE(FAND)
- NODE_NAME_CASE(FANDN)
- NODE_NAME_CASE(FOR)
- NODE_NAME_CASE(FXOR)
- NODE_NAME_CASE(FILD)
- NODE_NAME_CASE(FIST)
- NODE_NAME_CASE(FP_TO_INT_IN_MEM)
- NODE_NAME_CASE(FLD)
- NODE_NAME_CASE(FST)
- NODE_NAME_CASE(CALL)
- NODE_NAME_CASE(CALL_RVMARKER)
- NODE_NAME_CASE(IMP_CALL)
- NODE_NAME_CASE(BT)
- NODE_NAME_CASE(CMP)
- NODE_NAME_CASE(FCMP)
- NODE_NAME_CASE(STRICT_FCMP)
- NODE_NAME_CASE(STRICT_FCMPS)
- NODE_NAME_CASE(COMI)
- NODE_NAME_CASE(UCOMI)
- NODE_NAME_CASE(COMX)
- NODE_NAME_CASE(UCOMX)
- NODE_NAME_CASE(CMPM)
- NODE_NAME_CASE(CMPMM)
- NODE_NAME_CASE(STRICT_CMPM)
- NODE_NAME_CASE(CMPMM_SAE)
- NODE_NAME_CASE(SETCC)
- NODE_NAME_CASE(SETCC_CARRY)
- NODE_NAME_CASE(FSETCC)
- NODE_NAME_CASE(FSETCCM)
- NODE_NAME_CASE(FSETCCM_SAE)
- NODE_NAME_CASE(CMOV)
- NODE_NAME_CASE(BRCOND)
- NODE_NAME_CASE(RET_GLUE)
- NODE_NAME_CASE(IRET)
- NODE_NAME_CASE(REP_STOS)
- NODE_NAME_CASE(REP_MOVS)
- NODE_NAME_CASE(GlobalBaseReg)
- NODE_NAME_CASE(Wrapper)
- NODE_NAME_CASE(WrapperRIP)
- NODE_NAME_CASE(MOVQ2DQ)
- NODE_NAME_CASE(MOVDQ2Q)
- NODE_NAME_CASE(MMX_MOVD2W)
- NODE_NAME_CASE(MMX_MOVW2D)
- NODE_NAME_CASE(PEXTRB)
- NODE_NAME_CASE(PEXTRW)
- NODE_NAME_CASE(INSERTPS)
- NODE_NAME_CASE(PINSRB)
- NODE_NAME_CASE(PINSRW)
- NODE_NAME_CASE(PSHUFB)
- NODE_NAME_CASE(ANDNP)
- NODE_NAME_CASE(BLENDI)
- NODE_NAME_CASE(BLENDV)
- NODE_NAME_CASE(HADD)
- NODE_NAME_CASE(HSUB)
- NODE_NAME_CASE(FHADD)
- NODE_NAME_CASE(FHSUB)
- NODE_NAME_CASE(CONFLICT)
- NODE_NAME_CASE(FMAX)
- NODE_NAME_CASE(FMAXS)
- NODE_NAME_CASE(FMAX_SAE)
- NODE_NAME_CASE(FMAXS_SAE)
- NODE_NAME_CASE(STRICT_FMAX)
- NODE_NAME_CASE(FMIN)
- NODE_NAME_CASE(FMINS)
- NODE_NAME_CASE(FMIN_SAE)
- NODE_NAME_CASE(FMINS_SAE)
- NODE_NAME_CASE(STRICT_FMIN)
- NODE_NAME_CASE(FMAXC)
- NODE_NAME_CASE(FMINC)
- NODE_NAME_CASE(FRSQRT)
- NODE_NAME_CASE(FRCP)
- NODE_NAME_CASE(EXTRQI)
- NODE_NAME_CASE(INSERTQI)
- NODE_NAME_CASE(TLSADDR)
- NODE_NAME_CASE(TLSBASEADDR)
- NODE_NAME_CASE(TLSCALL)
- NODE_NAME_CASE(TLSDESC)
- NODE_NAME_CASE(EH_SJLJ_SETJMP)
- NODE_NAME_CASE(EH_SJLJ_LONGJMP)
- NODE_NAME_CASE(EH_SJLJ_SETUP_DISPATCH)
- NODE_NAME_CASE(EH_RETURN)
- NODE_NAME_CASE(TC_RETURN)
- NODE_NAME_CASE(FNSTCW16m)
- NODE_NAME_CASE(FLDCW16m)
- NODE_NAME_CASE(FNSTENVm)
- NODE_NAME_CASE(FLDENVm)
- NODE_NAME_CASE(LCMPXCHG_DAG)
- NODE_NAME_CASE(LCMPXCHG8_DAG)
- NODE_NAME_CASE(LCMPXCHG16_DAG)
- NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG)
- NODE_NAME_CASE(LADD)
- NODE_NAME_CASE(LSUB)
- NODE_NAME_CASE(LOR)
- NODE_NAME_CASE(LXOR)
- NODE_NAME_CASE(LAND)
- NODE_NAME_CASE(LBTS)
- NODE_NAME_CASE(LBTC)
- NODE_NAME_CASE(LBTR)
- NODE_NAME_CASE(LBTS_RM)
- NODE_NAME_CASE(LBTC_RM)
- NODE_NAME_CASE(LBTR_RM)
- NODE_NAME_CASE(AADD)
- NODE_NAME_CASE(AOR)
- NODE_NAME_CASE(AXOR)
- NODE_NAME_CASE(AAND)
- NODE_NAME_CASE(VZEXT_MOVL)
- NODE_NAME_CASE(VZEXT_LOAD)
- NODE_NAME_CASE(VEXTRACT_STORE)
- NODE_NAME_CASE(VTRUNC)
- NODE_NAME_CASE(VTRUNCS)
- NODE_NAME_CASE(VTRUNCUS)
- NODE_NAME_CASE(VMTRUNC)
- NODE_NAME_CASE(VMTRUNCS)
- NODE_NAME_CASE(VMTRUNCUS)
- NODE_NAME_CASE(VTRUNCSTORES)
- NODE_NAME_CASE(VTRUNCSTOREUS)
- NODE_NAME_CASE(VMTRUNCSTORES)
- NODE_NAME_CASE(VMTRUNCSTOREUS)
- NODE_NAME_CASE(VFPEXT)
- NODE_NAME_CASE(STRICT_VFPEXT)
- NODE_NAME_CASE(VFPEXT_SAE)
- NODE_NAME_CASE(VFPEXTS)
- NODE_NAME_CASE(VFPEXTS_SAE)
- NODE_NAME_CASE(VFPROUND)
- NODE_NAME_CASE(VFPROUND2)
- NODE_NAME_CASE(VFPROUND2_RND)
- NODE_NAME_CASE(STRICT_VFPROUND)
- NODE_NAME_CASE(VMFPROUND)
- NODE_NAME_CASE(VFPROUND_RND)
- NODE_NAME_CASE(VFPROUNDS)
- NODE_NAME_CASE(VFPROUNDS_RND)
- NODE_NAME_CASE(VSHLDQ)
- NODE_NAME_CASE(VSRLDQ)
- NODE_NAME_CASE(VSHL)
- NODE_NAME_CASE(VSRL)
- NODE_NAME_CASE(VSRA)
- NODE_NAME_CASE(VSHLI)
- NODE_NAME_CASE(VSRLI)
- NODE_NAME_CASE(VSRAI)
- NODE_NAME_CASE(VSHLV)
- NODE_NAME_CASE(VSRLV)
- NODE_NAME_CASE(VSRAV)
- NODE_NAME_CASE(VROTLI)
- NODE_NAME_CASE(VROTRI)
- NODE_NAME_CASE(VPPERM)
- NODE_NAME_CASE(CMPP)
- NODE_NAME_CASE(STRICT_CMPP)
- NODE_NAME_CASE(PCMPEQ)
- NODE_NAME_CASE(PCMPGT)
- NODE_NAME_CASE(PHMINPOS)
- NODE_NAME_CASE(ADD)
- NODE_NAME_CASE(SUB)
- NODE_NAME_CASE(ADC)
- NODE_NAME_CASE(SBB)
- NODE_NAME_CASE(SMUL)
- NODE_NAME_CASE(UMUL)
- NODE_NAME_CASE(OR)
- NODE_NAME_CASE(XOR)
- NODE_NAME_CASE(AND)
- NODE_NAME_CASE(BEXTR)
- NODE_NAME_CASE(BEXTRI)
- NODE_NAME_CASE(BZHI)
- NODE_NAME_CASE(PDEP)
- NODE_NAME_CASE(PEXT)
- NODE_NAME_CASE(MUL_IMM)
- NODE_NAME_CASE(MOVMSK)
- NODE_NAME_CASE(PTEST)
- NODE_NAME_CASE(TESTP)
- NODE_NAME_CASE(KORTEST)
- NODE_NAME_CASE(KTEST)
- NODE_NAME_CASE(KADD)
- NODE_NAME_CASE(KSHIFTL)
- NODE_NAME_CASE(KSHIFTR)
- NODE_NAME_CASE(PACKSS)
- NODE_NAME_CASE(PACKUS)
- NODE_NAME_CASE(PALIGNR)
- NODE_NAME_CASE(VALIGN)
- NODE_NAME_CASE(VSHLD)
- NODE_NAME_CASE(VSHRD)
- NODE_NAME_CASE(VSHLDV)
- NODE_NAME_CASE(VSHRDV)
- NODE_NAME_CASE(PSHUFD)
- NODE_NAME_CASE(PSHUFHW)
- NODE_NAME_CASE(PSHUFLW)
- NODE_NAME_CASE(SHUFP)
- NODE_NAME_CASE(SHUF128)
- NODE_NAME_CASE(MOVLHPS)
- NODE_NAME_CASE(MOVHLPS)
- NODE_NAME_CASE(MOVDDUP)
- NODE_NAME_CASE(MOVSHDUP)
- NODE_NAME_CASE(MOVSLDUP)
- NODE_NAME_CASE(MOVSD)
- NODE_NAME_CASE(MOVSS)
- NODE_NAME_CASE(MOVSH)
- NODE_NAME_CASE(UNPCKL)
- NODE_NAME_CASE(UNPCKH)
- NODE_NAME_CASE(VBROADCAST)
- NODE_NAME_CASE(VBROADCAST_LOAD)
- NODE_NAME_CASE(VBROADCASTM)
- NODE_NAME_CASE(SUBV_BROADCAST_LOAD)
- NODE_NAME_CASE(VPERMILPV)
- NODE_NAME_CASE(VPERMILPI)
- NODE_NAME_CASE(VPERM2X128)
- NODE_NAME_CASE(VPERMV)
- NODE_NAME_CASE(VPERMV3)
- NODE_NAME_CASE(VPERMI)
- NODE_NAME_CASE(VPTERNLOG)
- NODE_NAME_CASE(FP_TO_SINT_SAT)
- NODE_NAME_CASE(FP_TO_UINT_SAT)
- NODE_NAME_CASE(VFIXUPIMM)
- NODE_NAME_CASE(VFIXUPIMM_SAE)
- NODE_NAME_CASE(VFIXUPIMMS)
- NODE_NAME_CASE(VFIXUPIMMS_SAE)
- NODE_NAME_CASE(VRANGE)
- NODE_NAME_CASE(VRANGE_SAE)
- NODE_NAME_CASE(VRANGES)
- NODE_NAME_CASE(VRANGES_SAE)
- NODE_NAME_CASE(PMULUDQ)
- NODE_NAME_CASE(PMULDQ)
- NODE_NAME_CASE(PSADBW)
- NODE_NAME_CASE(DBPSADBW)
- NODE_NAME_CASE(VASTART_SAVE_XMM_REGS)
- NODE_NAME_CASE(VAARG_64)
- NODE_NAME_CASE(VAARG_X32)
- NODE_NAME_CASE(DYN_ALLOCA)
- NODE_NAME_CASE(MFENCE)
- NODE_NAME_CASE(SEG_ALLOCA)
- NODE_NAME_CASE(PROBED_ALLOCA)
- NODE_NAME_CASE(RDRAND)
- NODE_NAME_CASE(RDSEED)
- NODE_NAME_CASE(RDPKRU)
- NODE_NAME_CASE(WRPKRU)
- NODE_NAME_CASE(VPMADDUBSW)
- NODE_NAME_CASE(VPMADDWD)
- NODE_NAME_CASE(VPSHA)
- NODE_NAME_CASE(VPSHL)
- NODE_NAME_CASE(VPCOM)
- NODE_NAME_CASE(VPCOMU)
- NODE_NAME_CASE(VPERMIL2)
- NODE_NAME_CASE(FMSUB)
- NODE_NAME_CASE(STRICT_FMSUB)
- NODE_NAME_CASE(FNMADD)
- NODE_NAME_CASE(STRICT_FNMADD)
- NODE_NAME_CASE(FNMSUB)
- NODE_NAME_CASE(STRICT_FNMSUB)
- NODE_NAME_CASE(FMADDSUB)
- NODE_NAME_CASE(FMSUBADD)
- NODE_NAME_CASE(FMADD_RND)
- NODE_NAME_CASE(FNMADD_RND)
- NODE_NAME_CASE(FMSUB_RND)
- NODE_NAME_CASE(FNMSUB_RND)
- NODE_NAME_CASE(FMADDSUB_RND)
- NODE_NAME_CASE(FMSUBADD_RND)
- NODE_NAME_CASE(VFMADDC)
- NODE_NAME_CASE(VFMADDC_RND)
- NODE_NAME_CASE(VFCMADDC)
- NODE_NAME_CASE(VFCMADDC_RND)
- NODE_NAME_CASE(VFMULC)
- NODE_NAME_CASE(VFMULC_RND)
- NODE_NAME_CASE(VFCMULC)
- NODE_NAME_CASE(VFCMULC_RND)
- NODE_NAME_CASE(VFMULCSH)
- NODE_NAME_CASE(VFMULCSH_RND)
- NODE_NAME_CASE(VFCMULCSH)
- NODE_NAME_CASE(VFCMULCSH_RND)
- NODE_NAME_CASE(VFMADDCSH)
- NODE_NAME_CASE(VFMADDCSH_RND)
- NODE_NAME_CASE(VFCMADDCSH)
- NODE_NAME_CASE(VFCMADDCSH_RND)
- NODE_NAME_CASE(VPMADD52H)
- NODE_NAME_CASE(VPMADD52L)
- NODE_NAME_CASE(VRNDSCALE)
- NODE_NAME_CASE(STRICT_VRNDSCALE)
- NODE_NAME_CASE(VRNDSCALE_SAE)
- NODE_NAME_CASE(VRNDSCALES)
- NODE_NAME_CASE(VRNDSCALES_SAE)
- NODE_NAME_CASE(VREDUCE)
- NODE_NAME_CASE(VREDUCE_SAE)
- NODE_NAME_CASE(VREDUCES)
- NODE_NAME_CASE(VREDUCES_SAE)
- NODE_NAME_CASE(VGETMANT)
- NODE_NAME_CASE(VGETMANT_SAE)
- NODE_NAME_CASE(VGETMANTS)
- NODE_NAME_CASE(VGETMANTS_SAE)
- NODE_NAME_CASE(PCMPESTR)
- NODE_NAME_CASE(PCMPISTR)
- NODE_NAME_CASE(XTEST)
- NODE_NAME_CASE(COMPRESS)
- NODE_NAME_CASE(EXPAND)
- NODE_NAME_CASE(SELECTS)
- NODE_NAME_CASE(ADDSUB)
- NODE_NAME_CASE(RCP14)
- NODE_NAME_CASE(RCP14S)
- NODE_NAME_CASE(RSQRT14)
- NODE_NAME_CASE(RSQRT14S)
- NODE_NAME_CASE(FADD_RND)
- NODE_NAME_CASE(FADDS)
- NODE_NAME_CASE(FADDS_RND)
- NODE_NAME_CASE(FSUB_RND)
- NODE_NAME_CASE(FSUBS)
- NODE_NAME_CASE(FSUBS_RND)
- NODE_NAME_CASE(FMUL_RND)
- NODE_NAME_CASE(FMULS)
- NODE_NAME_CASE(FMULS_RND)
- NODE_NAME_CASE(FDIV_RND)
- NODE_NAME_CASE(FDIVS)
- NODE_NAME_CASE(FDIVS_RND)
- NODE_NAME_CASE(FSQRT_RND)
- NODE_NAME_CASE(FSQRTS)
- NODE_NAME_CASE(FSQRTS_RND)
- NODE_NAME_CASE(FGETEXP)
- NODE_NAME_CASE(FGETEXP_SAE)
- NODE_NAME_CASE(FGETEXPS)
- NODE_NAME_CASE(FGETEXPS_SAE)
- NODE_NAME_CASE(SCALEF)
- NODE_NAME_CASE(SCALEF_RND)
- NODE_NAME_CASE(SCALEFS)
- NODE_NAME_CASE(SCALEFS_RND)
- NODE_NAME_CASE(MULHRS)
- NODE_NAME_CASE(SINT_TO_FP_RND)
- NODE_NAME_CASE(UINT_TO_FP_RND)
- NODE_NAME_CASE(CVTTP2SI)
- NODE_NAME_CASE(CVTTP2UI)
- NODE_NAME_CASE(STRICT_CVTTP2SI)
- NODE_NAME_CASE(STRICT_CVTTP2UI)
- NODE_NAME_CASE(MCVTTP2SI)
- NODE_NAME_CASE(MCVTTP2UI)
- NODE_NAME_CASE(CVTTP2SI_SAE)
- NODE_NAME_CASE(CVTTP2UI_SAE)
- NODE_NAME_CASE(CVTTS2SI)
- NODE_NAME_CASE(CVTTS2UI)
- NODE_NAME_CASE(CVTTS2SI_SAE)
- NODE_NAME_CASE(CVTTS2UI_SAE)
- NODE_NAME_CASE(CVTSI2P)
- NODE_NAME_CASE(CVTUI2P)
- NODE_NAME_CASE(STRICT_CVTSI2P)
- NODE_NAME_CASE(STRICT_CVTUI2P)
- NODE_NAME_CASE(MCVTSI2P)
- NODE_NAME_CASE(MCVTUI2P)
- NODE_NAME_CASE(VFPCLASS)
- NODE_NAME_CASE(VFPCLASSS)
- NODE_NAME_CASE(MULTISHIFT)
- NODE_NAME_CASE(SCALAR_SINT_TO_FP)
- NODE_NAME_CASE(SCALAR_SINT_TO_FP_RND)
- NODE_NAME_CASE(SCALAR_UINT_TO_FP)
- NODE_NAME_CASE(SCALAR_UINT_TO_FP_RND)
- NODE_NAME_CASE(CVTPS2PH)
- NODE_NAME_CASE(STRICT_CVTPS2PH)
- NODE_NAME_CASE(CVTPS2PH_SAE)
- NODE_NAME_CASE(MCVTPS2PH)
- NODE_NAME_CASE(MCVTPS2PH_SAE)
- NODE_NAME_CASE(CVTPH2PS)
- NODE_NAME_CASE(STRICT_CVTPH2PS)
- NODE_NAME_CASE(CVTPH2PS_SAE)
- NODE_NAME_CASE(CVTP2SI)
- NODE_NAME_CASE(CVTP2UI)
- NODE_NAME_CASE(MCVTP2SI)
- NODE_NAME_CASE(MCVTP2UI)
- NODE_NAME_CASE(CVTP2SI_RND)
- NODE_NAME_CASE(CVTP2UI_RND)
- NODE_NAME_CASE(CVTS2SI)
- NODE_NAME_CASE(CVTS2UI)
- NODE_NAME_CASE(CVTS2SI_RND)
- NODE_NAME_CASE(CVTS2UI_RND)
- NODE_NAME_CASE(CVTNEPS2BF16)
- NODE_NAME_CASE(MCVTNEPS2BF16)
- NODE_NAME_CASE(DPBF16PS)
- NODE_NAME_CASE(DPFP16PS)
- NODE_NAME_CASE(MPSADBW)
- NODE_NAME_CASE(LWPINS)
- NODE_NAME_CASE(MGATHER)
- NODE_NAME_CASE(MSCATTER)
- NODE_NAME_CASE(VPDPBUSD)
- NODE_NAME_CASE(VPDPBUSDS)
- NODE_NAME_CASE(VPDPWSSD)
- NODE_NAME_CASE(VPDPWSSDS)
- NODE_NAME_CASE(VPSHUFBITQMB)
- NODE_NAME_CASE(GF2P8MULB)
- NODE_NAME_CASE(GF2P8AFFINEQB)
- NODE_NAME_CASE(GF2P8AFFINEINVQB)
- NODE_NAME_CASE(NT_CALL)
- NODE_NAME_CASE(NT_BRIND)
- NODE_NAME_CASE(UMWAIT)
- NODE_NAME_CASE(TPAUSE)
- NODE_NAME_CASE(ENQCMD)
- NODE_NAME_CASE(ENQCMDS)
- NODE_NAME_CASE(VP2INTERSECT)
- NODE_NAME_CASE(VPDPBSUD)
- NODE_NAME_CASE(VPDPBSUDS)
- NODE_NAME_CASE(VPDPBUUD)
- NODE_NAME_CASE(VPDPBUUDS)
- NODE_NAME_CASE(VPDPBSSD)
- NODE_NAME_CASE(VPDPBSSDS)
- NODE_NAME_CASE(VPDPWSUD)
- NODE_NAME_CASE(VPDPWSUDS)
- NODE_NAME_CASE(VPDPWUSD)
- NODE_NAME_CASE(VPDPWUSDS)
- NODE_NAME_CASE(VPDPWUUD)
- NODE_NAME_CASE(VPDPWUUDS)
- NODE_NAME_CASE(VMINMAX)
- NODE_NAME_CASE(VMINMAX_SAE)
- NODE_NAME_CASE(VMINMAXS)
- NODE_NAME_CASE(VMINMAXS_SAE)
- NODE_NAME_CASE(CVTP2IBS)
- NODE_NAME_CASE(CVTP2IUBS)
- NODE_NAME_CASE(CVTP2IBS_RND)
- NODE_NAME_CASE(CVTP2IUBS_RND)
- NODE_NAME_CASE(CVTTP2IBS)
- NODE_NAME_CASE(CVTTP2IUBS)
- NODE_NAME_CASE(CVTTP2IBS_SAE)
- NODE_NAME_CASE(CVTTP2IUBS_SAE)
- NODE_NAME_CASE(VCVT2PH2BF8)
- NODE_NAME_CASE(VCVT2PH2BF8S)
- NODE_NAME_CASE(VCVT2PH2HF8)
- NODE_NAME_CASE(VCVT2PH2HF8S)
- NODE_NAME_CASE(VCVTBIASPH2BF8)
- NODE_NAME_CASE(VCVTBIASPH2BF8S)
- NODE_NAME_CASE(VCVTBIASPH2HF8)
- NODE_NAME_CASE(VCVTBIASPH2HF8S)
- NODE_NAME_CASE(VCVTPH2BF8)
- NODE_NAME_CASE(VCVTPH2BF8S)
- NODE_NAME_CASE(VCVTPH2HF8)
- NODE_NAME_CASE(VCVTPH2HF8S)
- NODE_NAME_CASE(VMCVTBIASPH2BF8)
- NODE_NAME_CASE(VMCVTBIASPH2BF8S)
- NODE_NAME_CASE(VMCVTBIASPH2HF8)
- NODE_NAME_CASE(VMCVTBIASPH2HF8S)
- NODE_NAME_CASE(VMCVTPH2BF8)
- NODE_NAME_CASE(VMCVTPH2BF8S)
- NODE_NAME_CASE(VMCVTPH2HF8)
- NODE_NAME_CASE(VMCVTPH2HF8S)
- NODE_NAME_CASE(VCVTHF82PH)
- NODE_NAME_CASE(AESENC128KL)
- NODE_NAME_CASE(AESDEC128KL)
- NODE_NAME_CASE(AESENC256KL)
- NODE_NAME_CASE(AESDEC256KL)
- NODE_NAME_CASE(AESENCWIDE128KL)
- NODE_NAME_CASE(AESDECWIDE128KL)
- NODE_NAME_CASE(AESENCWIDE256KL)
- NODE_NAME_CASE(AESDECWIDE256KL)
- NODE_NAME_CASE(CMPCCXADD)
- NODE_NAME_CASE(TESTUI)
- NODE_NAME_CASE(FP80_ADD)
- NODE_NAME_CASE(STRICT_FP80_ADD)
- NODE_NAME_CASE(CCMP)
- NODE_NAME_CASE(CTEST)
- NODE_NAME_CASE(CLOAD)
- NODE_NAME_CASE(CSTORE)
- NODE_NAME_CASE(CVTTS2SIS)
- NODE_NAME_CASE(CVTTS2UIS)
- NODE_NAME_CASE(CVTTS2SIS_SAE)
- NODE_NAME_CASE(CVTTS2UIS_SAE)
- NODE_NAME_CASE(CVTTP2SIS)
- NODE_NAME_CASE(MCVTTP2SIS)
- NODE_NAME_CASE(CVTTP2UIS_SAE)
- NODE_NAME_CASE(CVTTP2SIS_SAE)
- NODE_NAME_CASE(CVTTP2UIS)
- NODE_NAME_CASE(MCVTTP2UIS)
- NODE_NAME_CASE(POP_FROM_X87_REG)
+ case X86ISD::FIRST_NUMBER:
+ break;
+#define NODE_NAME_CASE(NODE) \
+ case X86ISD::NODE: \
+ return "X86ISD::" #NODE;
+ NODE_NAME_CASE(BSF)
+ NODE_NAME_CASE(BSR)
+ NODE_NAME_CASE(FSHL)
+ NODE_NAME_CASE(FSHR)
+ NODE_NAME_CASE(FAND)
+ NODE_NAME_CASE(FANDN)
+ NODE_NAME_CASE(FOR)
+ NODE_NAME_CASE(FXOR)
+ NODE_NAME_CASE(FILD)
+ NODE_NAME_CASE(FIST)
+ NODE_NAME_CASE(FP_TO_INT_IN_MEM)
+ NODE_NAME_CASE(FLD)
+ NODE_NAME_CASE(FST)
+ NODE_NAME_CASE(CALL)
+ NODE_NAME_CASE(CALL_RVMARKER)
+ NODE_NAME_CASE(IMP_CALL)
+ NODE_NAME_CASE(BT)
+ NODE_NAME_CASE(CMP)
+ NODE_NAME_CASE(FCMP)
+ NODE_NAME_CASE(STRICT_FCMP)
+ NODE_NAME_CASE(STRICT_FCMPS)
+ NODE_NAME_CASE(COMI)
+ NODE_NAME_CASE(UCOMI)
+ NODE_NAME_CASE(COMX)
+ NODE_NAME_CASE(UCOMX)
+ NODE_NAME_CASE(CMPM)
+ NODE_NAME_CASE(CMPMM)
+ NODE_NAME_CASE(STRICT_CMPM)
+ NODE_NAME_CASE(CMPMM_SAE)
+ NODE_NAME_CASE(SETCC)
+ NODE_NAME_CASE(SETCC_CARRY)
+ NODE_NAME_CASE(FSETCC)
+ NODE_NAME_CASE(FSETCCM)
+ NODE_NAME_CASE(FSETCCM_SAE)
+ NODE_NAME_CASE(CMOV)
+ NODE_NAME_CASE(BRCOND)
+ NODE_NAME_CASE(RET_GLUE)
+ NODE_NAME_CASE(IRET)
+ NODE_NAME_CASE(REP_STOS)
+ NODE_NAME_CASE(REP_MOVS)
+ NODE_NAME_CASE(GlobalBaseReg)
+ NODE_NAME_CASE(Wrapper)
+ NODE_NAME_CASE(WrapperRIP)
+ NODE_NAME_CASE(MOVQ2DQ)
+ NODE_NAME_CASE(MOVDQ2Q)
+ NODE_NAME_CASE(MMX_MOVD2W)
+ NODE_NAME_CASE(MMX_MOVW2D)
+ NODE_NAME_CASE(PEXTRB)
+ NODE_NAME_CASE(PEXTRW)
+ NODE_NAME_CASE(INSERTPS)
+ NODE_NAME_CASE(PINSRB)
+ NODE_NAME_CASE(PINSRW)
+ NODE_NAME_CASE(PSHUFB)
+ NODE_NAME_CASE(ANDNP)
+ NODE_NAME_CASE(BLENDI)
+ NODE_NAME_CASE(BLENDV)
+ NODE_NAME_CASE(HADD)
+ NODE_NAME_CASE(HSUB)
+ NODE_NAME_CASE(FHADD)
+ NODE_NAME_CASE(FHSUB)
+ NODE_NAME_CASE(CONFLICT)
+ NODE_NAME_CASE(FMAX)
+ NODE_NAME_CASE(FMAXS)
+ NODE_NAME_CASE(FMAX_SAE)
+ NODE_NAME_CASE(FMAXS_SAE)
+ NODE_NAME_CASE(STRICT_FMAX)
+ NODE_NAME_CASE(FMIN)
+ NODE_NAME_CASE(FMINS)
+ NODE_NAME_CASE(FMIN_SAE)
+ NODE_NAME_CASE(FMINS_SAE)
+ NODE_NAME_CASE(STRICT_FMIN)
+ NODE_NAME_CASE(FMAXC)
+ NODE_NAME_CASE(FMINC)
+ NODE_NAME_CASE(FRSQRT)
+ NODE_NAME_CASE(FRCP)
+ NODE_NAME_CASE(EXTRQI)
+ NODE_NAME_CASE(INSERTQI)
+ NODE_NAME_CASE(TLSADDR)
+ NODE_NAME_CASE(TLSBASEADDR)
+ NODE_NAME_CASE(TLSCALL)
+ NODE_NAME_CASE(TLSDESC)
+ NODE_NAME_CASE(EH_SJLJ_SETJMP)
+ NODE_NAME_CASE(EH_SJLJ_LONGJMP)
+ NODE_NAME_CASE(EH_SJLJ_SETUP_DISPATCH)
+ NODE_NAME_CASE(EH_RETURN)
+ NODE_NAME_CASE(TC_RETURN)
+ NODE_NAME_CASE(FNSTCW16m)
+ NODE_NAME_CASE(FLDCW16m)
+ NODE_NAME_CASE(FNSTENVm)
+ NODE_NAME_CASE(FLDENVm)
+ NODE_NAME_CASE(LCMPXCHG_DAG)
+ NODE_NAME_CASE(LCMPXCHG8_DAG)
+ NODE_NAME_CASE(LCMPXCHG16_DAG)
+ NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG)
+ NODE_NAME_CASE(LADD)
+ NODE_NAME_CASE(LSUB)
+ NODE_NAME_CASE(LOR)
+ NODE_NAME_CASE(LXOR)
+ NODE_NAME_CASE(LAND)
+ NODE_NAME_CASE(LBTS)
+ NODE_NAME_CASE(LBTC)
+ NODE_NAME_CASE(LBTR)
+ NODE_NAME_CASE(LBTS_RM)
+ NODE_NAME_CASE(LBTC_RM)
+ NODE_NAME_CASE(LBTR_RM)
+ NODE_NAME_CASE(AADD)
+ NODE_NAME_CASE(AOR)
+ NODE_NAME_CASE(AXOR)
+ NODE_NAME_CASE(AAND)
+ NODE_NAME_CASE(VZEXT_MOVL)
+ NODE_NAME_CASE(VZEXT_LOAD)
+ NODE_NAME_CASE(VEXTRACT_STORE)
+ NODE_NAME_CASE(VTRUNC)
+ NODE_NAME_CASE(VTRUNCS)
+ NODE_NAME_CASE(VTRUNCUS)
+ NODE_NAME_CASE(VMTRUNC)
+ NODE_NAME_CASE(VMTRUNCS)
+ NODE_NAME_CASE(VMTRUNCUS)
+ NODE_NAME_CASE(VTRUNCSTORES)
+ NODE_NAME_CASE(VTRUNCSTOREUS)
+ NODE_NAME_CASE(VMTRUNCSTORES)
+ NODE_NAME_CASE(VMTRUNCSTOREUS)
+ NODE_NAME_CASE(VFPEXT)
+ NODE_NAME_CASE(STRICT_VFPEXT)
+ NODE_NAME_CASE(VFPEXT_SAE)
+ NODE_NAME_CASE(VFPEXTS)
+ NODE_NAME_CASE(VFPEXTS_SAE)
+ NODE_NAME_CASE(VFPROUND)
+ NODE_NAME_CASE(VFPROUND2)
+ NODE_NAME_CASE(VFPROUND2_RND)
+ NODE_NAME_CASE(STRICT_VFPROUND)
+ NODE_NAME_CASE(VMFPROUND)
+ NODE_NAME_CASE(VFPROUND_RND)
+ NODE_NAME_CASE(VFPROUNDS)
+ NODE_NAME_CASE(VFPROUNDS_RND)
+ NODE_NAME_CASE(VSHLDQ)
+ NODE_NAME_CASE(VSRLDQ)
+ NODE_NAME_CASE(VSHL)
+ NODE_NAME_CASE(VSRL)
+ NODE_NAME_CASE(VSRA)
+ NODE_NAME_CASE(VSHLI)
+ NODE_NAME_CASE(VSRLI)
+ NODE_NAME_CASE(VSRAI)
+ NODE_NAME_CASE(VSHLV)
+ NODE_NAME_CASE(VSRLV)
+ NODE_NAME_CASE(VSRAV)
+ NODE_NAME_CASE(VROTLI)
+ NODE_NAME_CASE(VROTRI)
+ NODE_NAME_CASE(VPPERM)
+ NODE_NAME_CASE(CMPP)
+ NODE_NAME_CASE(STRICT_CMPP)
+ NODE_NAME_CASE(PCMPEQ)
+ NODE_NAME_CASE(PCMPGT)
+ NODE_NAME_CASE(PHMINPOS)
+ NODE_NAME_CASE(ADD)
+ NODE_NAME_CASE(SUB)
+ NODE_NAME_CASE(ADC)
+ NODE_NAME_CASE(SBB)
+ NODE_NAME_CASE(SMUL)
+ NODE_NAME_CASE(UMUL)
+ NODE_NAME_CASE(OR)
+ NODE_NAME_CASE(XOR)
+ NODE_NAME_CASE(AND)
+ NODE_NAME_CASE(BEXTR)
+ NODE_NAME_CASE(BEXTRI)
+ NODE_NAME_CASE(BZHI)
+ NODE_NAME_CASE(PDEP)
+ NODE_NAME_CASE(PEXT)
+ NODE_NAME_CASE(MUL_IMM)
+ NODE_NAME_CASE(MOVMSK)
+ NODE_NAME_CASE(PTEST)
+ NODE_NAME_CASE(TESTP)
+ NODE_NAME_CASE(KORTEST)
+ NODE_NAME_CASE(KTEST)
+ NODE_NAME_CASE(KADD)
+ NODE_NAME_CASE(KSHIFTL)
+ NODE_NAME_CASE(KSHIFTR)
+ NODE_NAME_CASE(PACKSS)
+ NODE_NAME_CASE(PACKUS)
+ NODE_NAME_CASE(PALIGNR)
+ NODE_NAME_CASE(VALIGN)
+ NODE_NAME_CASE(VSHLD)
+ NODE_NAME_CASE(VSHRD)
+ NODE_NAME_CASE(VSHLDV)
+ NODE_NAME_CASE(VSHRDV)
+ NODE_NAME_CASE(PSHUFD)
+ NODE_NAME_CASE(PSHUFHW)
+ NODE_NAME_CASE(PSHUFLW)
+ NODE_NAME_CASE(SHUFP)
+ NODE_NAME_CASE(SHUF128)
+ NODE_NAME_CASE(MOVLHPS)
+ NODE_NAME_CASE(MOVHLPS)
+ NODE_NAME_CASE(MOVDDUP)
+ NODE_NAME_CASE(MOVSHDUP)
+ NODE_NAME_CASE(MOVSLDUP)
+ NODE_NAME_CASE(MOVSD)
+ NODE_NAME_CASE(MOVSS)
+ NODE_NAME_CASE(MOVSH)
+ NODE_NAME_CASE(UNPCKL)
+ NODE_NAME_CASE(UNPCKH)
+ NODE_NAME_CASE(VBROADCAST)
+ NODE_NAME_CASE(VBROADCAST_LOAD)
+ NODE_NAME_CASE(VBROADCASTM)
+ NODE_NAME_CASE(SUBV_BROADCAST_LOAD)
+ NODE_NAME_CASE(VPERMILPV)
+ NODE_NAME_CASE(VPERMILPI)
+ NODE_NAME_CASE(VPERM2X128)
+ NODE_NAME_CASE(VPERMV)
+ NODE_NAME_CASE(VPERMV3)
+ NODE_NAME_CASE(VPERMI)
+ NODE_NAME_CASE(VPTERNLOG)
+ NODE_NAME_CASE(FP_TO_SINT_SAT)
+ NODE_NAME_CASE(FP_TO_UINT_SAT)
+ NODE_NAME_CASE(VFIXUPIMM)
+ NODE_NAME_CASE(VFIXUPIMM_SAE)
+ NODE_NAME_CASE(VFIXUPIMMS)
+ NODE_NAME_CASE(VFIXUPIMMS_SAE)
+ NODE_NAME_CASE(VRANGE)
+ NODE_NAME_CASE(VRANGE_SAE)
+ NODE_NAME_CASE(VRANGES)
+ NODE_NAME_CASE(VRANGES_SAE)
+ NODE_NAME_CASE(PMULUDQ)
+ NODE_NAME_CASE(PMULDQ)
+ NODE_NAME_CASE(PSADBW)
+ NODE_NAME_CASE(DBPSADBW)
+ NODE_NAME_CASE(VASTART_SAVE_XMM_REGS)
+ NODE_NAME_CASE(VAARG_64)
+ NODE_NAME_CASE(VAARG_X32)
+ NODE_NAME_CASE(DYN_ALLOCA)
+ NODE_NAME_CASE(MFENCE)
+ NODE_NAME_CASE(SEG_ALLOCA)
+ NODE_NAME_CASE(PROBED_ALLOCA)
+ NODE_NAME_CASE(RDRAND)
+ NODE_NAME_CASE(RDSEED)
+ NODE_NAME_CASE(RDPKRU)
+ NODE_NAME_CASE(WRPKRU)
+ NODE_NAME_CASE(VPMADDUBSW)
+ NODE_NAME_CASE(VPMADDWD)
+ NODE_NAME_CASE(VPSHA)
+ NODE_NAME_CASE(VPSHL)
+ NODE_NAME_CASE(VPCOM)
+ NODE_NAME_CASE(VPCOMU)
+ NODE_NAME_CASE(VPERMIL2)
+ NODE_NAME_CASE(FMSUB)
+ NODE_NAME_CASE(STRICT_FMSUB)
+ NODE_NAME_CASE(FNMADD)
+ NODE_NAME_CASE(STRICT_FNMADD)
+ NODE_NAME_CASE(FNMSUB)
+ NODE_NAME_CASE(STRICT_FNMSUB)
+ NODE_NAME_CASE(FMADDSUB)
+ NODE_NAME_CASE(FMSUBADD)
+ NODE_NAME_CASE(FMADD_RND)
+ NODE_NAME_CASE(FNMADD_RND)
+ NODE_NAME_CASE(FMSUB_RND)
+ NODE_NAME_CASE(FNMSUB_RND)
+ NODE_NAME_CASE(FMADDSUB_RND)
+ NODE_NAME_CASE(FMSUBADD_RND)
+ NODE_NAME_CASE(VFMADDC)
+ NODE_NAME_CASE(VFMADDC_RND)
+ NODE_NAME_CASE(VFCMADDC)
+ NODE_NAME_CASE(VFCMADDC_RND)
+ NODE_NAME_CASE(VFMULC)
+ NODE_NAME_CASE(VFMULC_RND)
+ NODE_NAME_CASE(VFCMULC)
+ NODE_NAME_CASE(VFCMULC_RND)
+ NODE_NAME_CASE(VFMULCSH)
+ NODE_NAME_CASE(VFMULCSH_RND)
+ NODE_NAME_CASE(VFCMULCSH)
+ NODE_NAME_CASE(VFCMULCSH_RND)
+ NODE_NAME_CASE(VFMADDCSH)
+ NODE_NAME_CASE(VFMADDCSH_RND)
+ NODE_NAME_CASE(VFCMADDCSH)
+ NODE_NAME_CASE(VFCMADDCSH_RND)
+ NODE_NAME_CASE(VPMADD52H)
+ NODE_NAME_CASE(VPMADD52L)
+ NODE_NAME_CASE(VRNDSCALE)
+ NODE_NAME_CASE(STRICT_VRNDSCALE)
+ NODE_NAME_CASE(VRNDSCALE_SAE)
+ NODE_NAME_CASE(VRNDSCALES)
+ NODE_NAME_CASE(VRNDSCALES_SAE)
+ NODE_NAME_CASE(VREDUCE)
+ NODE_NAME_CASE(VREDUCE_SAE)
+ NODE_NAME_CASE(VREDUCES)
+ NODE_NAME_CASE(VREDUCES_SAE)
+ NODE_NAME_CASE(VGETMANT)
+ NODE_NAME_CASE(VGETMANT_SAE)
+ NODE_NAME_CASE(VGETMANTS)
+ NODE_NAME_CASE(VGETMANTS_SAE)
+ NODE_NAME_CASE(PCMPESTR)
+ NODE_NAME_CASE(PCMPISTR)
+ NODE_NAME_CASE(XTEST)
+ NODE_NAME_CASE(COMPRESS)
+ NODE_NAME_CASE(EXPAND)
+ NODE_NAME_CASE(SELECTS)
+ NODE_NAME_CASE(ADDSUB)
+ NODE_NAME_CASE(RCP14)
+ NODE_NAME_CASE(RCP14S)
+ NODE_NAME_CASE(RSQRT14)
+ NODE_NAME_CASE(RSQRT14S)
+ NODE_NAME_CASE(FADD_RND)
+ NODE_NAME_CASE(FADDS)
+ NODE_NAME_CASE(FADDS_RND)
+ NODE_NAME_CASE(FSUB_RND)
+ NODE_NAME_CASE(FSUBS)
+ NODE_NAME_CASE(FSUBS_RND)
+ NODE_NAME_CASE(FMUL_RND)
+ NODE_NAME_CASE(FMULS)
+ NODE_NAME_CASE(FMULS_RND)
+ NODE_NAME_CASE(FDIV_RND)
+ NODE_NAME_CASE(FDIVS)
+ NODE_NAME_CASE(FDIVS_RND)
+ NODE_NAME_CASE(FSQRT_RND)
+ NODE_NAME_CASE(FSQRTS)
+ NODE_NAME_CASE(FSQRTS_RND)
+ NODE_NAME_CASE(FGETEXP)
+ NODE_NAME_CASE(FGETEXP_SAE)
+ NODE_NAME_CASE(FGETEXPS)
+ NODE_NAME_CASE(FGETEXPS_SAE)
+ NODE_NAME_CASE(SCALEF)
+ NODE_NAME_CASE(SCALEF_RND)
+ NODE_NAME_CASE(SCALEFS)
+ NODE_NAME_CASE(SCALEFS_RND)
+ NODE_NAME_CASE(MULHRS)
+ NODE_NAME_CASE(SINT_TO_FP_RND)
+ NODE_NAME_CASE(UINT_TO_FP_RND)
+ NODE_NAME_CASE(CVTTP2SI)
+ NODE_NAME_CASE(CVTTP2UI)
+ NODE_NAME_CASE(STRICT_CVTTP2SI)
+ NODE_NAME_CASE(STRICT_CVTTP2UI)
+ NODE_NAME_CASE(MCVTTP2SI)
+ NODE_NAME_CASE(MCVTTP2UI)
+ NODE_NAME_CASE(CVTTP2SI_SAE)
+ NODE_NAME_CASE(CVTTP2UI_SAE)
+ NODE_NAME_CASE(CVTTS2SI)
+ NODE_NAME_CASE(CVTTS2UI)
+ NODE_NAME_CASE(CVTTS2SI_SAE)
+ NODE_NAME_CASE(CVTTS2UI_SAE)
+ NODE_NAME_CASE(CVTSI2P)
+ NODE_NAME_CASE(CVTUI2P)
+ NODE_NAME_CASE(STRICT_CVTSI2P)
+ NODE_NAME_CASE(STRICT_CVTUI2P)
+ NODE_NAME_CASE(MCVTSI2P)
+ NODE_NAME_CASE(MCVTUI2P)
+ NODE_NAME_CASE(VFPCLASS)
+ NODE_NAME_CASE(VFPCLASSS)
+ NODE_NAME_CASE(MULTISHIFT)
+ NODE_NAME_CASE(SCALAR_SINT_TO_FP)
+ NODE_NAME_CASE(SCALAR_SINT_TO_FP_RND)
+ NODE_NAME_CASE(SCALAR_UINT_TO_FP)
+ NODE_NAME_CASE(SCALAR_UINT_TO_FP_RND)
+ NODE_NAME_CASE(CVTPS2PH)
+ NODE_NAME_CASE(STRICT_CVTPS2PH)
+ NODE_NAME_CASE(CVTPS2PH_SAE)
+ NODE_NAME_CASE(MCVTPS2PH)
+ NODE_NAME_CASE(MCVTPS2PH_SAE)
+ NODE_NAME_CASE(CVTPH2PS)
+ NODE_NAME_CASE(STRICT_CVTPH2PS)
+ NODE_NAME_CASE(CVTPH2PS_SAE)
+ NODE_NAME_CASE(CVTP2SI)
+ NODE_NAME_CASE(CVTP2UI)
+ NODE_NAME_CASE(MCVTP2SI)
+ NODE_NAME_CASE(MCVTP2UI)
+ NODE_NAME_CASE(CVTP2SI_RND)
+ NODE_NAME_CASE(CVTP2UI_RND)
+ NODE_NAME_CASE(CVTS2SI)
+ NODE_NAME_CASE(CVTS2UI)
+ NODE_NAME_CASE(CVTS2SI_RND)
+ NODE_NAME_CASE(CVTS2UI_RND)
+ NODE_NAME_CASE(CVTNEPS2BF16)
+ NODE_NAME_CASE(MCVTNEPS2BF16)
+ NODE_NAME_CASE(DPBF16PS)
+ NODE_NAME_CASE(DPFP16PS)
+ NODE_NAME_CASE(MPSADBW)
+ NODE_NAME_CASE(LWPINS)
+ NODE_NAME_CASE(MGATHER)
+ NODE_NAME_CASE(MSCATTER)
+ NODE_NAME_CASE(VPDPBUSD)
+ NODE_NAME_CASE(VPDPBUSDS)
+ NODE_NAME_CASE(VPDPWSSD)
+ NODE_NAME_CASE(VPDPWSSDS)
+ NODE_NAME_CASE(VPSHUFBITQMB)
+ NODE_NAME_CASE(GF2P8MULB)
+ NODE_NAME_CASE(GF2P8AFFINEQB)
+ NODE_NAME_CASE(GF2P8AFFINEINVQB)
+ NODE_NAME_CASE(NT_CALL)
+ NODE_NAME_CASE(NT_BRIND)
+ NODE_NAME_CASE(UMWAIT)
+ NODE_NAME_CASE(TPAUSE)
+ NODE_NAME_CASE(ENQCMD)
+ NODE_NAME_CASE(ENQCMDS)
+ NODE_NAME_CASE(VP2INTERSECT)
+ NODE_NAME_CASE(VPDPBSUD)
+ NODE_NAME_CASE(VPDPBSUDS)
+ NODE_NAME_CASE(VPDPBUUD)
+ NODE_NAME_CASE(VPDPBUUDS)
+ NODE_NAME_CASE(VPDPBSSD)
+ NODE_NAME_CASE(VPDPBSSDS)
+ NODE_NAME_CASE(VPDPWSUD)
+ NODE_NAME_CASE(VPDPWSUDS)
+ NODE_NAME_CASE(VPDPWUSD)
+ NODE_NAME_CASE(VPDPWUSDS)
+ NODE_NAME_CASE(VPDPWUUD)
+ NODE_NAME_CASE(VPDPWUUDS)
+ NODE_NAME_CASE(VMINMAX)
+ NODE_NAME_CASE(VMINMAX_SAE)
+ NODE_NAME_CASE(VMINMAXS)
+ NODE_NAME_CASE(VMINMAXS_SAE)
+ NODE_NAME_CASE(CVTP2IBS)
+ NODE_NAME_CASE(CVTP2IUBS)
+ NODE_NAME_CASE(CVTP2IBS_RND)
+ NODE_NAME_CASE(CVTP2IUBS_RND)
+ NODE_NAME_CASE(CVTTP2IBS)
+ NODE_NAME_CASE(CVTTP2IUBS)
+ NODE_NAME_CASE(CVTTP2IBS_SAE)
+ NODE_NAME_CASE(CVTTP2IUBS_SAE)
+ NODE_NAME_CASE(VCVT2PH2BF8)
+ NODE_NAME_CASE(VCVT2PH2BF8S)
+ NODE_NAME_CASE(VCVT2PH2HF8)
+ NODE_NAME_CASE(VCVT2PH2HF8S)
+ NODE_NAME_CASE(VCVTBIASPH2BF8)
+ NODE_NAME_CASE(VCVTBIASPH2BF8S)
+ NODE_NAME_CASE(VCVTBIASPH2HF8)
+ NODE_NAME_CASE(VCVTBIASPH2HF8S)
+ NODE_NAME_CASE(VCVTPH2BF8)
+ NODE_NAME_CASE(VCVTPH2BF8S)
+ NODE_NAME_CASE(VCVTPH2HF8)
+ NODE_NAME_CASE(VCVTPH2HF8S)
+ NODE_NAME_CASE(VMCVTBIASPH2BF8)
+ NODE_NAME_CASE(VMCVTBIASPH2BF8S)
+ NODE_NAME_CASE(VMCVTBIASPH2HF8)
+ NODE_NAME_CASE(VMCVTBIASPH2HF8S)
+ NODE_NAME_CASE(VMCVTPH2BF8)
+ NODE_NAME_CASE(VMCVTPH2BF8S)
+ NODE_NAME_CASE(VMCVTPH2HF8)
+ NODE_NAME_CASE(VMCVTPH2HF8S)
+ NODE_NAME_CASE(VCVTHF82PH)
+ NODE_NAME_CASE(AESENC128KL)
+ NODE_NAME_CASE(AESDEC128KL)
+ NODE_NAME_CASE(AESENC256KL)
+ NODE_NAME_CASE(AESDEC256KL)
+ NODE_NAME_CASE(AESENCWIDE128KL)
+ NODE_NAME_CASE(AESDECWIDE128KL)
+ NODE_NAME_CASE(AESENCWIDE256KL)
+ NODE_NAME_CASE(AESDECWIDE256KL)
+ NODE_NAME_CASE(CMPCCXADD)
+ NODE_NAME_CASE(TESTUI)
+ NODE_NAME_CASE(FP80_ADD)
+ NODE_NAME_CASE(STRICT_FP80_ADD)
+ NODE_NAME_CASE(CCMP)
+ NODE_NAME_CASE(CTEST)
+ NODE_NAME_CASE(CLOAD)
+ NODE_NAME_CASE(CSTORE)
+ NODE_NAME_CASE(CVTTS2SIS)
+ NODE_NAME_CASE(CVTTS2UIS)
+ NODE_NAME_CASE(CVTTS2SIS_SAE)
+ NODE_NAME_CASE(CVTTS2UIS_SAE)
+ NODE_NAME_CASE(CVTTP2SIS)
+ NODE_NAME_CASE(MCVTTP2SIS)
+ NODE_NAME_CASE(CVTTP2UIS_SAE)
+ NODE_NAME_CASE(CVTTP2SIS_SAE)
+ NODE_NAME_CASE(CVTTP2UIS)
+ NODE_NAME_CASE(MCVTTP2UIS)
+ NODE_NAME_CASE(POP_FROM_X87_REG)
}
return nullptr;
#undef NODE_NAME_CASE
@@ -35377,7 +35422,7 @@ bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (AM.HasBaseReg)
return false;
break;
- default: // Other stuff never works.
+ default: // Other stuff never works.
return false;
}
@@ -35482,12 +35527,13 @@ bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
if (Val.getOpcode() != ISD::LOAD)
return false;
- if (!VT1.isSimple() || !VT1.isInteger() ||
- !VT2.isSimple() || !VT2.isInteger())
+ if (!VT1.isSimple() || !VT1.isInteger() || !VT2.isSimple() ||
+ !VT2.isInteger())
return false;
switch (VT1.getSimpleVT().SimpleTy) {
- default: break;
+ default:
+ break;
case MVT::i8:
case MVT::i16:
case MVT::i32:
@@ -35694,8 +35740,10 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB,
// sinkMBB:
// DstReg := phi(mainDstReg/mainBB, fallDstReg/fallBB)
BuildMI(*sinkMBB, sinkMBB->begin(), MIMD, TII->get(X86::PHI), DstReg)
- .addReg(mainDstReg).addMBB(mainMBB)
- .addReg(fallDstReg).addMBB(fallMBB);
+ .addReg(mainDstReg)
+ .addMBB(mainMBB)
+ .addReg(fallDstReg)
+ .addMBB(fallMBB);
MI.eraseFromParent();
return sinkMBB;
@@ -35761,8 +35809,8 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
unsigned TotalNumXMMRegs = 8;
bool UseGPOffset = (ArgMode == 1);
bool UseFPOffset = (ArgMode == 2);
- unsigned MaxOffset = TotalNumIntRegs * 8 +
- (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
+ unsigned MaxOffset =
+ TotalNumIntRegs * 8 + (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
/* Align ArgSize to a multiple of 8 */
unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
@@ -35840,13 +35888,14 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// Check if there is enough room left to pull this argument.
BuildMI(thisMBB, MIMD, TII->get(X86::CMP32ri))
- .addReg(OffsetReg)
- .addImm(MaxOffset + 8 - ArgSizeA8);
+ .addReg(OffsetReg)
+ .addImm(MaxOffset + 8 - ArgSizeA8);
// Branch to "overflowMBB" if offset >= max
// Fall through to "offsetMBB" otherwise
BuildMI(thisMBB, MIMD, TII->get(X86::JCC_1))
- .addMBB(overflowMBB).addImm(X86::COND_AE);
+ .addMBB(overflowMBB)
+ .addImm(X86::COND_AE);
}
// In offsetMBB, emit code to use the reg_save_area.
@@ -35888,8 +35937,8 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// Compute the offset for the next argument
Register NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
BuildMI(offsetMBB, MIMD, TII->get(X86::ADD32ri), NextOffsetReg)
- .addReg(OffsetReg)
- .addImm(UseFPOffset ? 16 : 8);
+ .addReg(OffsetReg)
+ .addImm(UseFPOffset ? 16 : 8);
// Store it back into the va_list.
BuildMI(offsetMBB, MIMD, TII->get(X86::MOV32mr))
@@ -35902,8 +35951,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
.setMemRefs(StoreOnlyMMO);
// Jump to endMBB
- BuildMI(offsetMBB, MIMD, TII->get(X86::JMP_1))
- .addMBB(endMBB);
+ BuildMI(offsetMBB, MIMD, TII->get(X86::JMP_1)).addMBB(endMBB);
}
//
@@ -35944,7 +35992,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
.addImm(~(uint64_t)(Alignment.value() - 1));
} else {
BuildMI(overflowMBB, MIMD, TII->get(TargetOpcode::COPY), OverflowDestReg)
- .addReg(OverflowAddrReg);
+ .addReg(OverflowAddrReg);
}
// Compute the next overflow address after this argument.
@@ -35970,10 +36018,11 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// If we branched, emit the PHI to the front of endMBB.
if (offsetMBB) {
- BuildMI(*endMBB, endMBB->begin(), MIMD,
- TII->get(X86::PHI), DestReg)
- .addReg(OffsetDestReg).addMBB(offsetMBB)
- .addReg(OverflowDestReg).addMBB(overflowMBB);
+ BuildMI(*endMBB, endMBB->begin(), MIMD, TII->get(X86::PHI), DestReg)
+ .addReg(OffsetDestReg)
+ .addMBB(offsetMBB)
+ .addReg(OverflowDestReg)
+ .addMBB(overflowMBB);
}
// Erase the pseudo instruction
@@ -35988,8 +36037,8 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// kill marker, and set it if it should. Returns the correct kill
// marker value.
static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
- MachineBasicBlock* BB,
- const TargetRegisterInfo* TRI) {
+ MachineBasicBlock *BB,
+ const TargetRegisterInfo *TRI) {
if (isPhysRegUsedAfter(X86::EFLAGS, SelectItr))
return false;
@@ -36456,11 +36505,21 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
//
// + ---- <- ------------ <- ------------- <- ------------ +
// | |
- // [free probe] -> [page alloc] -> [alloc probe] -> [tail alloc] + -> [dyn probe] -> [page alloc] -> [dyn probe] -> [tail alloc] +
- // | |
- // + <- ----------- <- ------------ <- ----------- <- ------------ +
+ // [free probe] -> [page alloc] -> [alloc probe] -> [tail alloc] + -> [dyn
+ // probe] -> [page alloc] -> [dyn probe] -> [tail alloc] +
+ // | |
+ // + <-
+ // -----------
+ // <-
+ // ------------
+ // <-
+ // -----------
+ // <-
+ // ------------
+ // +
//
- // The property we want to enforce is to never have more than [page alloc] between two probes.
+ // The property we want to enforce is to never have more than [page alloc]
+ // between two probes.
const unsigned XORMIOpc =
TFI.Uses64BitFramePtr ? X86::XOR64mi32 : X86::XOR32mi;
@@ -36553,56 +36612,61 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
// Add code to the main basic block to check if the stack limit has been hit,
// and if so, jump to mallocMBB otherwise to bumpMBB.
BuildMI(BB, MIMD, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
- BuildMI(BB, MIMD, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
- .addReg(tmpSPVReg).addReg(sizeVReg);
- BuildMI(BB, MIMD, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
- .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
- .addReg(SPLimitVReg);
+ BuildMI(BB, MIMD, TII->get(IsLP64 ? X86::SUB64rr : X86::SUB32rr), SPLimitVReg)
+ .addReg(tmpSPVReg)
+ .addReg(sizeVReg);
+ BuildMI(BB, MIMD, TII->get(IsLP64 ? X86::CMP64mr : X86::CMP32mr))
+ .addReg(0)
+ .addImm(1)
+ .addReg(0)
+ .addImm(TlsOffset)
+ .addReg(TlsReg)
+ .addReg(SPLimitVReg);
BuildMI(BB, MIMD, TII->get(X86::JCC_1)).addMBB(mallocMBB).addImm(X86::COND_G);
// bumpMBB simply decreases the stack pointer, since we know the current
// stacklet has enough space.
BuildMI(bumpMBB, MIMD, TII->get(TargetOpcode::COPY), physSPReg)
- .addReg(SPLimitVReg);
+ .addReg(SPLimitVReg);
BuildMI(bumpMBB, MIMD, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
- .addReg(SPLimitVReg);
+ .addReg(SPLimitVReg);
BuildMI(bumpMBB, MIMD, TII->get(X86::JMP_1)).addMBB(continueMBB);
// Calls into a routine in libgcc to allocate more space from the heap.
const uint32_t *RegMask =
Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C);
if (IsLP64) {
- BuildMI(mallocMBB, MIMD, TII->get(X86::MOV64rr), X86::RDI)
- .addReg(sizeVReg);
+ BuildMI(mallocMBB, MIMD, TII->get(X86::MOV64rr), X86::RDI).addReg(sizeVReg);
BuildMI(mallocMBB, MIMD, TII->get(X86::CALL64pcrel32))
- .addExternalSymbol("__morestack_allocate_stack_space")
- .addRegMask(RegMask)
- .addReg(X86::RDI, RegState::Implicit)
- .addReg(X86::RAX, RegState::ImplicitDefine);
+ .addExternalSymbol("__morestack_allocate_stack_space")
+ .addRegMask(RegMask)
+ .addReg(X86::RDI, RegState::Implicit)
+ .addReg(X86::RAX, RegState::ImplicitDefine);
} else if (Is64Bit) {
- BuildMI(mallocMBB, MIMD, TII->get(X86::MOV32rr), X86::EDI)
- .addReg(sizeVReg);
+ BuildMI(mallocMBB, MIMD, TII->get(X86::MOV32rr), X86::EDI).addReg(sizeVReg);
BuildMI(mallocMBB, MIMD, TII->get(X86::CALL64pcrel32))
- .addExternalSymbol("__morestack_allocate_stack_space")
- .addRegMask(RegMask)
- .addReg(X86::EDI, RegState::Implicit)
- .addReg(X86::EAX, RegState::ImplicitDefine);
+ .addExternalSymbol("__morestack_allocate_stack_space")
+ .addRegMask(RegMask)
+ .addReg(X86::EDI, RegState::Implicit)
+ .addReg(X86::EAX, RegState::ImplicitDefine);
} else {
- BuildMI(mallocMBB, MIMD, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
- .addImm(12);
+ BuildMI(mallocMBB, MIMD, TII->get(X86::SUB32ri), physSPReg)
+ .addReg(physSPReg)
+ .addImm(12);
BuildMI(mallocMBB, MIMD, TII->get(X86::PUSH32r)).addReg(sizeVReg);
BuildMI(mallocMBB, MIMD, TII->get(X86::CALLpcrel32))
- .addExternalSymbol("__morestack_allocate_stack_space")
- .addRegMask(RegMask)
- .addReg(X86::EAX, RegState::ImplicitDefine);
+ .addExternalSymbol("__morestack_allocate_stack_space")
+ .addRegMask(RegMask)
+ .addReg(X86::EAX, RegState::ImplicitDefine);
}
if (!Is64Bit)
- BuildMI(mallocMBB, MIMD, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)
- .addImm(16);
+ BuildMI(mallocMBB, MIMD, TII->get(X86::ADD32ri), physSPReg)
+ .addReg(physSPReg)
+ .addImm(16);
BuildMI(mallocMBB, MIMD, TII->get(TargetOpcode::COPY), mallocPtrVReg)
- .addReg(IsLP64 ? X86::RAX : X86::EAX);
+ .addReg(IsLP64 ? X86::RAX : X86::EAX);
BuildMI(mallocMBB, MIMD, TII->get(X86::JMP_1)).addMBB(continueMBB);
// Set up the CFG correctly.
@@ -36657,7 +36721,8 @@ X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
RestoreMBB->setIsEHPad(true);
auto RestoreMBBI = RestoreMBB->begin();
- BuildMI(*RestoreMBB, RestoreMBBI, MIMD, TII.get(X86::JMP_4)).addMBB(TargetMBB);
+ BuildMI(*RestoreMBB, RestoreMBBI, MIMD, TII.get(X86::JMP_4))
+ .addMBB(TargetMBB);
return BB;
}
@@ -36679,9 +36744,10 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
// FIXME: The 32-bit calls have non-standard calling conventions. Use a
// proper register mask.
const uint32_t *RegMask =
- Subtarget.is64Bit() ?
- Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() :
- Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
+ Subtarget.is64Bit()
+ ? Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask()
+ : Subtarget.getRegisterInfo()->getCallPreservedMask(*F,
+ CallingConv::C);
if (Subtarget.is64Bit()) {
MachineInstrBuilder MIB =
BuildMI(*BB, MI, MIMD, TII->get(X86::MOV64rm), X86::RDI)
@@ -36937,8 +37003,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MemOpndSlot = CurOp;
MVT PVT = getPointerTy(MF->getDataLayout());
- assert((PVT == MVT::i64 || PVT == MVT::i32) &&
- "Invalid Pointer Size!");
+ assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
// For v = setjmp(buf), we generate
//
@@ -36986,19 +37051,19 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
LabelReg = MRI.createVirtualRegister(PtrRC);
if (Subtarget.is64Bit()) {
MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::LEA64r), LabelReg)
- .addReg(X86::RIP)
- .addImm(0)
- .addReg(0)
- .addMBB(restoreMBB)
- .addReg(0);
+ .addReg(X86::RIP)
+ .addImm(0)
+ .addReg(0)
+ .addMBB(restoreMBB)
+ .addReg(0);
} else {
- const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII);
+ const X86InstrInfo *XII = static_cast<const X86InstrInfo *>(TII);
MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::LEA32r), LabelReg)
- .addReg(XII->getGlobalBaseReg(MF))
- .addImm(0)
- .addReg(0)
- .addMBB(restoreMBB, Subtarget.classifyBlockAddressReference())
- .addReg(0);
+ .addReg(XII->getGlobalBaseReg(MF))
+ .addImm(0)
+ .addReg(0)
+ .addMBB(restoreMBB, Subtarget.classifyBlockAddressReference())
+ .addReg(0);
}
} else
PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
@@ -37022,7 +37087,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
// Setup
MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::EH_SjLj_Setup))
- .addMBB(restoreMBB);
+ .addMBB(restoreMBB);
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
MIB.addRegMask(RegInfo->getNoPreservedMask());
@@ -37050,9 +37115,9 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
Register FramePtr = RegInfo->getFrameRegister(*MF);
Register BasePtr = RegInfo->getBaseRegister();
unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm;
- addRegOffset(BuildMI(restoreMBB, MIMD, TII->get(Opm), BasePtr),
- FramePtr, true, X86FI->getRestoreBasePointerOffset())
- .setMIFlag(MachineInstr::FrameSetup);
+ addRegOffset(BuildMI(restoreMBB, MIMD, TII->get(Opm), BasePtr), FramePtr,
+ true, X86FI->getRestoreBasePointerOffset())
+ .setMIFlag(MachineInstr::FrameSetup);
}
BuildMI(restoreMBB, MIMD, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
BuildMI(restoreMBB, MIMD, TII->get(X86::JMP_1)).addMBB(sinkMBB);
@@ -37135,9 +37200,9 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
if (PVT == MVT::i64) {
Register TmpZReg = MRI.createVirtualRegister(PtrRC);
BuildMI(checkSspMBB, MIMD, TII->get(X86::SUBREG_TO_REG), TmpZReg)
- .addImm(0)
- .addReg(ZReg)
- .addImm(X86::sub_32bit);
+ .addImm(0)
+ .addReg(ZReg)
+ .addImm(X86::sub_32bit);
ZReg = TmpZReg;
}
@@ -37268,11 +37333,10 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands());
MVT PVT = getPointerTy(MF->getDataLayout());
- assert((PVT == MVT::i64 || PVT == MVT::i32) &&
- "Invalid Pointer Size!");
+ assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
const TargetRegisterClass *RC =
- (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
+ (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
Register Tmp = MRI.createVirtualRegister(RC);
// Since FP is only updated here but NOT referenced, it's treated as GPR.
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -37654,7 +37718,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
const MIMetadata MIMD(MI);
auto TMMImmToTMMReg = [](unsigned Imm) {
- assert (Imm < 8 && "Illegal tmm index");
+ assert(Imm < 8 && "Illegal tmm index");
return X86::TMM0 + Imm;
};
auto TMMImmToTMMPair = [](unsigned Imm) {
@@ -37794,29 +37858,30 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// OR 0b11 into bit 10 and 11. 0b11 is the encoding for round toward zero.
Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
BuildMI(*BB, MI, MIMD, TII->get(X86::OR32ri), NewCW)
- .addReg(OldCW, RegState::Kill).addImm(0xC00);
+ .addReg(OldCW, RegState::Kill)
+ .addImm(0xC00);
// Extract to 16 bits.
Register NewCW16 =
MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), NewCW16)
- .addReg(NewCW, RegState::Kill, X86::sub_16bit);
+ .addReg(NewCW, RegState::Kill, X86::sub_16bit);
// Prepare memory for FLDCW.
int NewCWFrameIdx =
MF->getFrameInfo().CreateStackObject(2, Align(2), false);
addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOV16mr)),
NewCWFrameIdx)
- .addReg(NewCW16, RegState::Kill);
+ .addReg(NewCW16, RegState::Kill);
// Reload the modified control word now...
- addFrameReference(BuildMI(*BB, MI, MIMD,
- TII->get(X86::FLDCW16m)), NewCWFrameIdx);
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FLDCW16m)),
+ NewCWFrameIdx);
// Get the X86 opcode to use.
unsigned Opc;
switch (MI.getOpcode()) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("illegal opcode!");
case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
@@ -37827,7 +37892,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
- // clang-format on
+ // clang-format on
}
X86AddressMode AM = getAddressFromInstr(&MI, 0);
@@ -38050,23 +38115,44 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTTMMULTF32PS: {
unsigned Opc;
switch (MI.getOpcode()) {
- default: llvm_unreachable("illegal opcode!");
- case X86::PTDPBSSD: Opc = X86::TDPBSSD; break;
- case X86::PTDPBSUD: Opc = X86::TDPBSUD; break;
- case X86::PTDPBUSD: Opc = X86::TDPBUSD; break;
- case X86::PTDPBUUD: Opc = X86::TDPBUUD; break;
- case X86::PTDPBF16PS: Opc = X86::TDPBF16PS; break;
- case X86::PTDPFP16PS: Opc = X86::TDPFP16PS; break;
+ default:
+ llvm_unreachable("illegal opcode!");
+ case X86::PTDPBSSD:
+ Opc = X86::TDPBSSD;
+ break;
+ case X86::PTDPBSUD:
+ Opc = X86::TDPBSUD;
+ break;
+ case X86::PTDPBUSD:
+ Opc = X86::TDPBUSD;
+ break;
+ case X86::PTDPBUUD:
+ Opc = X86::TDPBUUD;
+ break;
+ case X86::PTDPBF16PS:
+ Opc = X86::TDPBF16PS;
+ break;
+ case X86::PTDPFP16PS:
+ Opc = X86::TDPFP16PS;
+ break;
case X86::PTCMMIMFP16PS:
Opc = X86::TCMMIMFP16PS;
break;
case X86::PTCMMRLFP16PS:
Opc = X86::TCMMRLFP16PS;
break;
- case X86::PTDPBF8PS: Opc = X86::TDPBF8PS; break;
- case X86::PTDPBHF8PS: Opc = X86::TDPBHF8PS; break;
- case X86::PTDPHBF8PS: Opc = X86::TDPHBF8PS; break;
- case X86::PTDPHF8PS: Opc = X86::TDPHF8PS; break;
+ case X86::PTDPBF8PS:
+ Opc = X86::TDPBF8PS;
+ break;
+ case X86::PTDPBHF8PS:
+ Opc = X86::TDPBHF8PS;
+ break;
+ case X86::PTDPHBF8PS:
+ Opc = X86::TDPHBF8PS;
+ break;
+ case X86::PTDPHF8PS:
+ Opc = X86::TDPHF8PS;
+ break;
case X86::PTTDPBF16PS:
Opc = X86::TTDPBF16PS;
break;
@@ -38119,7 +38205,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTILESTORED: {
unsigned Opc;
switch (MI.getOpcode()) {
- default: llvm_unreachable("illegal opcode!");
+ default:
+ llvm_unreachable("illegal opcode!");
#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
case X86::PTILELOADD:
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
@@ -38305,11 +38392,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// X86 Optimization Hooks
//===----------------------------------------------------------------------===//
-bool
-X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
- const APInt &DemandedBits,
- const APInt &DemandedElts,
- TargetLoweringOpt &TLO) const {
+bool X86TargetLowering::targetShrinkDemandedConstant(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ TargetLoweringOpt &TLO) const {
EVT VT = Op.getValueType();
unsigned Opcode = Op.getOpcode();
unsigned EltSize = VT.getScalarSizeInBits();
@@ -38494,16 +38579,15 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
unsigned NumElts = DemandedElts.getBitWidth();
unsigned Opc = Op.getOpcode();
EVT VT = Op.getValueType();
- assert((Opc >= ISD::BUILTIN_OP_END ||
- Opc == ISD::INTRINSIC_WO_CHAIN ||
- Opc == ISD::INTRINSIC_W_CHAIN ||
- Opc == ISD::INTRINSIC_VOID) &&
+ assert((Opc >= ISD::BUILTIN_OP_END || Opc == ISD::INTRINSIC_WO_CHAIN ||
+ Opc == ISD::INTRINSIC_W_CHAIN || Opc == ISD::INTRINSIC_VOID) &&
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
Known.resetAll();
switch (Opc) {
- default: break;
+ default:
+ break;
case X86ISD::MUL_IMM: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -38734,7 +38818,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
- if (auto* Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
+ if (auto *Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0);
unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8);
@@ -38908,7 +38992,8 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
unsigned NumElts = VT.getVectorNumElements();
if (Mask.size() == NumElts) {
SmallVector<APInt, 2> DemandedOps(NumOps, APInt(NumElts, 0));
- Known.Zero.setAllBits(); Known.One.setAllBits();
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
for (unsigned i = 0; i != NumElts; ++i) {
if (!DemandedElts[i])
continue;
@@ -39053,16 +39138,18 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
case X86ISD::ANDNP: {
unsigned Tmp0 =
DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
- if (Tmp0 == 1) return 1; // Early out.
+ if (Tmp0 == 1)
+ return 1; // Early out.
unsigned Tmp1 =
DAG.ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
return std::min(Tmp0, Tmp1);
}
case X86ISD::CMOV: {
- unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth+1);
- if (Tmp0 == 1) return 1; // Early out.
- unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (Tmp0 == 1)
+ return 1; // Early out.
+ unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth + 1);
return std::min(Tmp0, Tmp1);
}
}
@@ -39438,7 +39525,6 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
PermuteImm = (unsigned)ShiftAmt;
return true;
}
-
}
}
@@ -39498,7 +39584,8 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
// Attempt to match against either an unary or binary PACKSS/PACKUS shuffle.
if (((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) ||
- ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) && Subtarget.hasInt256()) ||
+ ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) &&
+ Subtarget.hasInt256()) ||
((MaskVT == MVT::v32i16 || MaskVT == MVT::v64i8) && Subtarget.hasBWI())) {
if (matchShuffleWithPACK(MaskVT, SrcVT, V1, V2, Shuffle, Mask, DAG,
Subtarget)) {
@@ -40057,9 +40144,9 @@ static SDValue combineX86ShuffleChain(
SDValue LHS = isInRange(Mask[0], 0, 2) ? V1 : V2;
SDValue RHS = isInRange(Mask[1], 0, 2) ? V1 : V2;
return DAG.getNode(X86ISD::VPERM2X128, DL, RootVT,
- CanonicalizeShuffleInput(RootVT, LHS),
- CanonicalizeShuffleInput(RootVT, RHS),
- DAG.getTargetConstant(PermMask, DL, MVT::i8));
+ CanonicalizeShuffleInput(RootVT, LHS),
+ CanonicalizeShuffleInput(RootVT, RHS),
+ DAG.getTargetConstant(PermMask, DL, MVT::i8));
}
}
}
@@ -40153,8 +40240,8 @@ static SDValue combineX86ShuffleChain(
}
if (matchUnaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
- AllowIntDomain, DAG, Subtarget, Shuffle, ShuffleVT,
- PermuteImm) &&
+ AllowIntDomain, DAG, Subtarget, Shuffle,
+ ShuffleVT, PermuteImm) &&
(!IsMaskedShuffle ||
(NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 0 && RootOpc == Shuffle)
@@ -41032,11 +41119,9 @@ static SDValue combineX86ShufflesConstants(MVT VT, ArrayRef<SDValue> Ops,
}
namespace llvm {
- namespace X86 {
- enum {
- MaxShuffleCombineDepth = 8
- };
- } // namespace X86
+namespace X86 {
+enum { MaxShuffleCombineDepth = 8 };
+} // namespace X86
} // namespace llvm
/// Fully generic combining of x86 shuffle instructions.
@@ -41440,7 +41525,8 @@ static SDValue combineX86ShufflesRecursively(
// The Op itself may be of different VT, so we need to scale the mask.
unsigned NumOpElts = Op.getValueType().getVectorNumElements();
- APInt OpScaledDemandedElts = APIntOps::ScaleBitMask(OpDemandedElts, NumOpElts);
+ APInt OpScaledDemandedElts =
+ APIntOps::ScaleBitMask(OpDemandedElts, NumOpElts);
// Can this operand be simplified any further, given it's demanded elements?
if (SDValue NewOp = TLI.SimplifyMultipleUseDemandedVectorElts(
@@ -42239,7 +42325,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
ISD::isNormalLoad(Src.getNode())) {
LoadSDNode *LN = cast<LoadSDNode>(Src);
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
SDValue BcastLd =
DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
LN->getMemoryVT(), LN->getMemOperand());
@@ -42271,7 +42357,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
// Unless its volatile or atomic.
if (LN->isSimple()) {
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
SDValue BcastLd = DAG.getMemIntrinsicNode(
X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16,
LN->getPointerInfo(), LN->getBaseAlign(),
@@ -42289,7 +42375,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
LoadSDNode *LN = cast<LoadSDNode>(Src.getOperand(0));
if (LN->getMemoryVT().getSizeInBits() == 16) {
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
SDValue BcastLd =
DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
LN->getMemoryVT(), LN->getMemOperand());
@@ -42316,7 +42402,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
SDValue Ptr = DAG.getMemBasePlusOffset(
LN->getBasePtr(), TypeSize::getFixed(Offset), DL);
- SDValue Ops[] = { LN->getChain(), Ptr };
+ SDValue Ops[] = {LN->getChain(), Ptr};
SDValue BcastLd = DAG.getMemIntrinsicNode(
X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16,
LN->getPointerInfo().getWithOffset(Offset), LN->getBaseAlign(),
@@ -42334,7 +42420,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
MemSDNode *LN = cast<MemIntrinsicSDNode>(Src);
if (LN->getMemoryVT().getSizeInBits() == VT.getScalarSizeInBits()) {
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
SDValue BcastLd =
DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
LN->getMemoryVT(), LN->getMemOperand());
@@ -42368,20 +42454,6 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
case X86ISD::VZEXT_MOVL: {
SDValue N0 = N.getOperand(0);
- // Fold (vzmovl (shift x, y)) -> (shift (vzmovl x), y)
- // Zeroing out the upper elements means we're just shifting a zero value.
- // TODO: Try harder to move vzmovl upward towards SCALAR_TO_VECTOR nodes.
- // TODO: Move this to canonicalizeShuffleWithOp once we add zero handling.
- if (N0.getOpcode() == X86ISD::VSHL || N0.getOpcode() == X86ISD::VSHLI ||
- N0.getOpcode() == X86ISD::VSRL || N0.getOpcode() == X86ISD::VSRLI ||
- N0.getOpcode() == X86ISD::VSRA || N0.getOpcode() == X86ISD::VSRAI) {
- if (N0.hasOneUse())
- return DAG.getNode(
- N0.getOpcode(), DL, VT,
- DAG.getNode(X86ISD::VZEXT_MOVL, DL, VT, N0.getOperand(0)),
- N0.getOperand(1));
- }
-
// If this a vzmovl of a full vector load, replace it with a vzload, unless
// the load is volatile.
if (N0.hasOneUse() && ISD::isNormalLoad(N0.getNode())) {
@@ -42816,13 +42888,13 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
if (Op1.getOpcode() == X86ISD::VBROADCAST_LOAD && Op1.hasOneUse()) {
auto *MemIntr = cast<MemIntrinsicSDNode>(Op1);
if (MemIntr->getMemoryVT().getScalarSizeInBits() == 32) {
- SDValue Load = DAG.getLoad(MVT::f32, DL, MemIntr->getChain(),
- MemIntr->getBasePtr(),
- MemIntr->getMemOperand());
- SDValue Insert = DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
- Load),
- DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8));
+ SDValue Load =
+ DAG.getLoad(MVT::f32, DL, MemIntr->getChain(),
+ MemIntr->getBasePtr(), MemIntr->getMemOperand());
+ SDValue Insert = DAG.getNode(
+ X86ISD::INSERTPS, DL, VT, Op0,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Load),
+ DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8));
DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1));
return Insert;
}
@@ -42976,8 +43048,8 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
if (Mask[0] == Mask[1] && Mask[2] == Mask[3] &&
(V.getOpcode() == X86ISD::PSHUFLW ||
V.getOpcode() == X86ISD::PSHUFHW) &&
- V.getOpcode() != N.getOpcode() &&
- V.hasOneUse() && V.getOperand(0).hasOneUse()) {
+ V.getOpcode() != N.getOpcode() && V.hasOneUse() &&
+ V.getOperand(0).hasOneUse()) {
SDValue D = peekThroughOneUseBitcasts(V.getOperand(0));
if (D.getOpcode() == X86ISD::PSHUFD) {
SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
@@ -43051,11 +43123,11 @@ static bool isAddSubOrSubAddMask(ArrayRef<int> Mask, bool &Op0Even) {
/// operation. If true is returned then the operands of ADDSUB(SUBADD) operation
/// are written to the parameters \p Opnd0 and \p Opnd1.
///
-/// We combine shuffle to ADDSUB(SUBADD) directly on the abstract vector shuffle nodes
-/// so it is easier to generically match. We also insert dummy vector shuffle
-/// nodes for the operands which explicitly discard the lanes which are unused
-/// by this operation to try to flow through the rest of the combiner
-/// the fact that they're unused.
+/// We combine shuffle to ADDSUB(SUBADD) directly on the abstract vector shuffle
+/// nodes so it is easier to generically match. We also insert dummy vector
+/// shuffle nodes for the operands which explicitly discard the lanes which are
+/// unused by this operation to try to flow through the rest of the combiner the
+/// fact that they're unused.
static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1,
bool &IsSubAdd) {
@@ -43089,13 +43161,15 @@ static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
// commute the FADD operands.
SDValue LHS, RHS;
if (V1.getOpcode() == ISD::FSUB) {
- LHS = V1->getOperand(0); RHS = V1->getOperand(1);
+ LHS = V1->getOperand(0);
+ RHS = V1->getOperand(1);
if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) &&
(V2->getOperand(0) != RHS || V2->getOperand(1) != LHS))
return false;
} else {
assert(V2.getOpcode() == ISD::FSUB && "Unexpected opcode");
- LHS = V2->getOperand(0); RHS = V2->getOperand(1);
+ LHS = V2->getOperand(0);
+ RHS = V2->getOperand(1);
if ((V1->getOperand(0) != LHS || V1->getOperand(1) != RHS) &&
(V1->getOperand(0) != RHS || V1->getOperand(1) != LHS))
return false;
@@ -43107,8 +43181,8 @@ static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
return false;
// It's a subadd if the vector in the even parity is an FADD.
- IsSubAdd = Op0Even ? V1->getOpcode() == ISD::FADD
- : V2->getOpcode() == ISD::FADD;
+ IsSubAdd =
+ Op0Even ? V1->getOpcode() == ISD::FADD : V2->getOpcode() == ISD::FADD;
Opnd0 = LHS;
Opnd1 = RHS;
@@ -43446,7 +43520,8 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// TODO: Multiply by zero.
- // If RHS/LHS elements are known zero then we don't need the LHS/RHS equivalent.
+ // If RHS/LHS elements are known zero then we don't need the LHS/RHS
+ // equivalent.
APInt DemandedLHSElts = DemandedSrcElts & ~RHSZero;
if (SimplifyDemandedVectorElts(LHS, DemandedLHSElts, LHSUndef, LHSZero, TLO,
Depth + 1))
@@ -44206,7 +44281,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// For splats, unless we *only* demand the 0'th element,
// stop attempts at simplification here, we aren't going to improve things,
// this is better than any potential shuffle.
- if (!DemandedElts.isOne() && TLO.DAG.isSplatValue(Op, /*AllowUndefs*/false))
+ if (!DemandedElts.isOne() && TLO.DAG.isSplatValue(Op, /*AllowUndefs*/ false))
return false;
// Get target/faux shuffle mask.
@@ -44303,7 +44378,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
EVT VT = Op.getValueType();
unsigned BitWidth = OriginalDemandedBits.getBitWidth();
unsigned Opc = Op.getOpcode();
- switch(Opc) {
+ switch (Opc) {
case X86ISD::VTRUNC: {
KnownBits KnownOp;
SDValue Src = Op.getOperand(0);
@@ -44311,8 +44386,10 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
// Simplify the input, using demanded bit information.
APInt TruncMask = OriginalDemandedBits.zext(SrcVT.getScalarSizeInBits());
- APInt DemandedElts = OriginalDemandedElts.trunc(SrcVT.getVectorNumElements());
- if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, KnownOp, TLO, Depth + 1))
+ APInt DemandedElts =
+ OriginalDemandedElts.trunc(SrcVT.getVectorNumElements());
+ if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, KnownOp, TLO,
+ Depth + 1))
return true;
break;
}
@@ -44416,7 +44493,8 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
}
}
- // If we are only demanding sign bits then we can use the shift source directly.
+ // If we are only demanding sign bits then we can use the shift source
+ // directly.
unsigned NumSignBits =
TLO.DAG.ComputeNumSignBits(Op0, OriginalDemandedElts, Depth + 1);
unsigned UpperDemandedBits = BitWidth - OriginalDemandedBits.countr_zero();
@@ -44607,8 +44685,8 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
return true;
KnownBits KnownVec;
- if (SimplifyDemandedBits(Vec, DemandedVecBits, DemandedVecElts,
- KnownVec, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Vec, DemandedVecBits, DemandedVecElts, KnownVec,
+ TLO, Depth + 1))
return true;
if (SDValue V = SimplifyMultipleUseDemandedBits(
@@ -45145,13 +45223,13 @@ static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
// Helper to flip between AND/OR/XOR opcodes and their X86ISD FP equivalents.
static unsigned getAltBitOpcode(unsigned Opcode) {
- switch(Opcode) {
- // clang-format off
+ switch (Opcode) {
+ // clang-format off
case ISD::AND: return X86ISD::FAND;
case ISD::OR: return X86ISD::FOR;
case ISD::XOR: return X86ISD::FXOR;
case X86ISD::ANDNP: return X86ISD::FANDN;
- // clang-format on
+ // clang-format on
}
llvm_unreachable("Unknown bitwise opcode");
}
@@ -45373,8 +45451,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
// Convert a vXi1 constant build vector to the same width scalar integer.
static SDValue combinevXi1ConstantToInteger(SDValue Op, SelectionDAG &DAG) {
EVT SrcVT = Op.getValueType();
- assert(SrcVT.getVectorElementType() == MVT::i1 &&
- "Expected a vXi1 vector");
+ assert(SrcVT.getVectorElementType() == MVT::i1 && "Expected a vXi1 vector");
assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
"Expected a constant build vector");
@@ -45408,8 +45485,7 @@ static SDValue combineCastedMaskArithmetic(SDNode *N, SelectionDAG &DAG,
return SDValue();
// Look for logic ops.
- if (Op.getOpcode() != ISD::AND &&
- Op.getOpcode() != ISD::OR &&
+ if (Op.getOpcode() != ISD::AND && Op.getOpcode() != ISD::OR &&
Op.getOpcode() != ISD::XOR)
return SDValue();
@@ -45700,7 +45776,7 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// and the vbroadcast_load are both integer or both fp. In some cases this
// will remove the bitcast entirely.
if (N0.getOpcode() == X86ISD::VBROADCAST_LOAD && N0.hasOneUse() &&
- VT.isFloatingPoint() != SrcVT.isFloatingPoint() && VT.isVector()) {
+ VT.isFloatingPoint() != SrcVT.isFloatingPoint() && VT.isVector()) {
auto *BCast = cast<MemIntrinsicSDNode>(N0);
unsigned SrcVTSize = SrcVT.getScalarSizeInBits();
unsigned MemSize = BCast->getMemoryVT().getScalarSizeInBits();
@@ -45713,7 +45789,7 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
LoadVT = MVT::getVectorVT(LoadVT, SrcVT.getVectorNumElements());
SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other);
- SDValue Ops[] = { BCast->getChain(), BCast->getBasePtr() };
+ SDValue Ops[] = {BCast->getChain(), BCast->getBasePtr()};
SDValue ResNode =
DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops,
MemVT, BCast->getMemOperand());
@@ -45763,7 +45839,7 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
bool LowUndef = true, AllUndefOrZero = true;
for (unsigned i = 1, e = SrcVT.getVectorNumElements(); i != e; ++i) {
SDValue Op = N0.getOperand(i);
- LowUndef &= Op.isUndef() || (i >= e/2);
+ LowUndef &= Op.isUndef() || (i >= e / 2);
AllUndefOrZero &= isNullConstantOrUndef(Op);
}
if (AllUndefOrZero) {
@@ -45805,8 +45881,8 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// Try to remove a bitcast of constant vXi1 vector. We have to legalize
// most of these to scalar anyway.
- if (Subtarget.hasAVX512() && VT.isScalarInteger() &&
- SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 &&
+ if (Subtarget.hasAVX512() && VT.isScalarInteger() && SrcVT.isVector() &&
+ SrcVT.getVectorElementType() == MVT::i1 &&
ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
return combinevXi1ConstantToInteger(N0, DAG);
}
@@ -45824,8 +45900,8 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// Look for MOVMSK that is maybe truncated and then bitcasted to vXi1.
// Turn it into a sign bit compare that produces a k-register. This avoids
// a trip through a GPR.
- if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() &&
- VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
+ if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() && VT.isVector() &&
+ VT.getVectorElementType() == MVT::i1 &&
isPowerOf2_32(VT.getVectorNumElements())) {
unsigned NumElts = VT.getVectorNumElements();
SDValue Src = N0;
@@ -45879,12 +45955,12 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// transferring the SSE operand to integer register and back.
unsigned FPOpcode;
switch (N0.getOpcode()) {
- // clang-format off
+ // clang-format off
case ISD::AND: FPOpcode = X86ISD::FAND; break;
case ISD::OR: FPOpcode = X86ISD::FOR; break;
case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
default: return SDValue();
- // clang-format on
+ // clang-format on
}
// Check if we have a bitcast from another integer type as well.
@@ -46006,7 +46082,7 @@ static SDValue createVPDPBUSD(SelectionDAG &DAG, SDValue LHS, SDValue RHS,
// Actually build the DotProduct, split as 256/512 bits for
// AVXVNNI/AVX512VNNI.
auto DpBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
- ArrayRef<SDValue> Ops) {
+ ArrayRef<SDValue> Ops) {
MVT VT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
return DAG.getNode(X86ISD::VPDPBUSD, DL, VT, Ops);
};
@@ -46043,7 +46119,7 @@ static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0,
return DAG.getNode(X86ISD::PSADBW, DL, VT, Ops);
};
MVT SadVT = MVT::getVectorVT(MVT::i64, RegSize / 64);
- return SplitOpsAndApply(DAG, Subtarget, DL, SadVT, { SadOp0, SadOp1 },
+ return SplitOpsAndApply(DAG, Subtarget, DL, SadVT, {SadOp0, SadOp1},
PSADBWBuilder);
}
@@ -46122,7 +46198,8 @@ static SDValue combineMinMaxReduction(SDNode *Extract, SelectionDAG &DAG,
DAG.getVectorIdxConstant(0, DL));
}
-// Attempt to replace an all_of/any_of/parity style horizontal reduction with a MOVMSK.
+// Attempt to replace an all_of/any_of/parity style horizontal reduction with a
+// MOVMSK.
static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// Bail without SSE2.
@@ -46387,9 +46464,9 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
if (Stages > 3) {
unsigned SadElems = SadVT.getVectorNumElements();
- for(unsigned i = Stages - 3; i > 0; --i) {
+ for (unsigned i = Stages - 3; i > 0; --i) {
SmallVector<int, 16> Mask(SadElems, -1);
- for(unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)
+ for (unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)
Mask[j] = MaskEnd + j;
SDValue Shuffle =
@@ -46706,10 +46783,10 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
Vec.getOperand(0).getValueType().getScalarType(),
Vec.getOperand(0), Index);
- SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
- Vec.getOperand(1), Index);
- SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
- Vec.getOperand(2), Index);
+ SDValue Ext1 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Vec.getOperand(1), Index);
+ SDValue Ext2 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Vec.getOperand(2), Index);
return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2);
}
@@ -46989,8 +47066,9 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
return DAG.getConstant(EltBits[Idx].zext(NumEltBits), dl, VT);
}
- // Convert extract_element(bitcast(<X x i1>) -> bitcast(extract_subvector()).
- // Improves lowering of bool masks on rust which splits them into byte array.
+ // Convert extract_element(bitcast(<X x i1>) ->
+ // bitcast(extract_subvector()). Improves lowering of bool masks on rust
+ // which splits them into byte array.
if (InputVector.getOpcode() == ISD::BITCAST && (NumEltBits % 8) == 0) {
SDValue Src = peekThroughBitcasts(InputVector);
if (Src.getValueType().getScalarType() == MVT::i1 &&
@@ -47445,8 +47523,7 @@ static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDValue Cond = N->getOperand(0);
- if ((N->getOpcode() != ISD::VSELECT &&
- N->getOpcode() != X86ISD::BLENDV) ||
+ if ((N->getOpcode() != ISD::VSELECT && N->getOpcode() != X86ISD::BLENDV) ||
ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
return SDValue();
@@ -47727,7 +47804,8 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Check for x CC y ? x : y.
if (DAG.isEqualTo(LHS, Op0) && DAG.isEqualTo(RHS, Op1)) {
switch (CC) {
- default: break;
+ default:
+ break;
case ISD::SETULT:
// Converting this to a min would handle NaNs incorrectly, and swapping
// the operands would cause it to handle comparisons between positive
@@ -47792,10 +47870,11 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
Opcode = X86ISD::FMAX;
break;
}
- // Check for x CC y ? y : x -- a min/max with reversed arms.
+ // Check for x CC y ? y : x -- a min/max with reversed arms.
} else if (DAG.isEqualTo(LHS, Op1) && DAG.isEqualTo(RHS, Op0)) {
switch (CC) {
- default: break;
+ default:
+ break;
case ISD::SETOGE:
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
@@ -47999,13 +48078,13 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
Cond1 == InnerSetCC.getOperand(1)) {
ISD::CondCode NewCC;
switch (CC == ISD::SETEQ ? InnerCC : CC) {
- // clang-format off
+ // clang-format off
case ISD::SETGT: NewCC = ISD::SETGE; break;
case ISD::SETLT: NewCC = ISD::SETLE; break;
case ISD::SETUGT: NewCC = ISD::SETUGE; break;
case ISD::SETULT: NewCC = ISD::SETULE; break;
default: NewCC = ISD::SETCC_INVALID; break;
- // clang-format on
+ // clang-format on
}
if (NewCC != ISD::SETCC_INVALID) {
Cond = DAG.getSetCC(DL, CondVT, Cond0, Cond1, NewCC);
@@ -48178,9 +48257,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// 16-bit lacks a proper blendv.
unsigned EltBitWidth = VT.getScalarSizeInBits();
bool CanShiftBlend =
- TLI.isTypeLegal(VT) && ((Subtarget.hasAVX() && EltBitWidth == 32) ||
- (Subtarget.hasAVX2() && EltBitWidth == 64) ||
- (Subtarget.hasXOP()));
+ TLI.isTypeLegal(VT) &&
+ ((Subtarget.hasAVX() && EltBitWidth == 32) ||
+ (Subtarget.hasAVX2() && EltBitWidth == 64) || (Subtarget.hasXOP()));
if (CanShiftBlend &&
ISD::matchUnaryPredicate(And.getOperand(1), [](ConstantSDNode *C) {
return C->getAPIntValue().isPowerOf2();
@@ -48415,7 +48494,7 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
SDValue Op2 = Cmp.getOperand(1);
SDValue SetCC;
- const ConstantSDNode* C = nullptr;
+ const ConstantSDNode *C = nullptr;
bool needOppositeCond = (CC == X86::COND_E);
bool checkAgainstTrue = false; // Is it a comparison against 1?
@@ -48436,8 +48515,7 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
bool truncatedToBoolWithAnd = false;
// Skip (zext $x), (trunc $x), or (and $x, 1) node.
while (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
- SetCC.getOpcode() == ISD::TRUNCATE ||
- SetCC.getOpcode() == ISD::AND) {
+ SetCC.getOpcode() == ISD::TRUNCATE || SetCC.getOpcode() == ISD::AND) {
if (SetCC.getOpcode() == ISD::AND) {
int OpIdx = -1;
if (isOneConstant(SetCC.getOperand(0)))
@@ -48480,13 +48558,13 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
if (!FVal) {
SDValue Op = SetCC.getOperand(0);
// Skip 'zext' or 'trunc' node.
- if (Op.getOpcode() == ISD::ZERO_EXTEND ||
- Op.getOpcode() == ISD::TRUNCATE)
+ if (Op.getOpcode() == ISD::ZERO_EXTEND || Op.getOpcode() == ISD::TRUNCATE)
Op = Op.getOperand(0);
// A special case for rdrand/rdseed, where 0 is set if false cond is
// found.
if ((Op.getOpcode() != X86ISD::RDRAND &&
- Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0)
+ Op.getOpcode() != X86ISD::RDSEED) ||
+ Op.getResNo() != 0)
return SDValue();
}
// Quit if false value is not the constant 0 or 1.
@@ -48531,7 +48609,8 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
SDValue SetCC0, SetCC1;
switch (Cond->getOpcode()) {
- default: return false;
+ default:
+ return false;
case ISD::AND:
case X86ISD::AND:
isAnd = true;
@@ -48596,8 +48675,7 @@ static SDValue combineCarryThroughADD(SDValue EFLAGS, SelectionDAG &DAG) {
}
// If this is a check of the z flag of an add with 1, switch to the
// C flag.
- if (CarryCC == X86::COND_E &&
- CarryOp1.getOpcode() == X86ISD::ADD &&
+ if (CarryCC == X86::COND_E && CarryOp1.getOpcode() == X86ISD::ADD &&
isOneConstant(CarryOp1.getOperand(1)))
return CarryOp1;
} else if (FoundAndLSB) {
@@ -49108,12 +49186,11 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
// Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient
// for any integer data type, including i8/i16.
- if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+ if (FalseC->getAPIntValue() + 1 == TrueC->getAPIntValue()) {
Cond = getSETCC(CC, Cond, DL, DAG);
// Zero extend the condition if needed.
- Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
- FalseC->getValueType(0), Cond);
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond);
Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
return Cond;
@@ -49129,24 +49206,25 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
bool isFastMultiplier = false;
if (Diff.ult(10)) {
switch (Diff.getZExtValue()) {
- default: break;
- case 1: // result = add base, cond
- case 2: // result = lea base( , cond*2)
- case 3: // result = lea base(cond, cond*2)
- case 4: // result = lea base( , cond*4)
- case 5: // result = lea base(cond, cond*4)
- case 8: // result = lea base( , cond*8)
- case 9: // result = lea base(cond, cond*8)
+ default:
+ break;
+ case 1: // result = add base, cond
+ case 2: // result = lea base( , cond*2)
+ case 3: // result = lea base(cond, cond*2)
+ case 4: // result = lea base( , cond*4)
+ case 5: // result = lea base(cond, cond*4)
+ case 8: // result = lea base( , cond*8)
+ case 9: // result = lea base(cond, cond*8)
isFastMultiplier = true;
break;
}
}
if (isFastMultiplier) {
- Cond = getSETCC(CC, Cond, DL ,DAG);
+ Cond = getSETCC(CC, Cond, DL, DAG);
// Zero extend the condition if needed.
- Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
- Cond);
+ Cond =
+ DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond);
// Scale the condition by the difference.
if (Diff != 1)
Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
@@ -49856,7 +49934,7 @@ static SDValue combineShiftToPMULH(SDNode *N, SelectionDAG &DAG,
const SDLoc &DL,
const X86Subtarget &Subtarget) {
assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
- "SRL or SRA node is required here!");
+ "SRL or SRA node is required here!");
if (!Subtarget.hasSSE2())
return SDValue();
@@ -49934,8 +50012,7 @@ static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG,
// fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
// since the result of setcc_c is all zero's or all ones.
- if (VT.isInteger() && !VT.isVector() &&
- N1C && N0.getOpcode() == ISD::AND &&
+ if (VT.isInteger() && !VT.isVector() && N1C && N0.getOpcode() == ISD::AND &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
SDValue N00 = N0.getOperand(0);
APInt Mask = N0.getConstantOperandAPInt(1);
@@ -50019,7 +50096,7 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG,
if (SraConst.isNegative())
return SDValue();
- for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) {
+ for (MVT SVT : {MVT::i8, MVT::i16, MVT::i32}) {
unsigned ShiftSize = SVT.getSizeInBits();
// Only deal with (Size - ShlConst) being equal to 8, 16 or 32.
if (ShiftSize >= Size || ShlConst != Size - ShiftSize)
@@ -50353,8 +50430,8 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
// Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular
// truncate to create a larger truncate.
- if (Subtarget.hasAVX512() &&
- N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 &&
+ if (Subtarget.hasAVX512() && N0.getOpcode() == ISD::TRUNCATE &&
+ N1.isUndef() && VT == MVT::v16i8 &&
N0.getOperand(0).getValueType() == MVT::v8i32) {
if ((IsSigned && DAG.ComputeNumSignBits(N0) > 8) ||
(!IsSigned &&
@@ -50701,7 +50778,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
SDValue CMP00 = CMP0->getOperand(0);
SDValue CMP01 = CMP0->getOperand(1);
- EVT VT = CMP00.getValueType();
+ EVT VT = CMP00.getValueType();
if (VT == MVT::f32 || VT == MVT::f64 ||
(VT == MVT::f16 && Subtarget.hasFP16())) {
@@ -50727,8 +50804,10 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
}
if (!ExpectingFlags) {
- enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
- enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);
+ enum X86::CondCode cc0 =
+ (enum X86::CondCode)N0.getConstantOperandVal(0);
+ enum X86::CondCode cc1 =
+ (enum X86::CondCode)N1.getConstantOperandVal(0);
if (cc1 == X86::COND_E || cc1 == X86::COND_NE) {
X86::CondCode tmp = cc0;
@@ -50736,7 +50815,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
cc1 = tmp;
}
- if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
+ if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
(cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
// FIXME: need symbolic constants for these magic numbers.
// See X86ATTInstPrinter.cpp:printSSECC().
@@ -50746,7 +50825,8 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01,
DAG.getTargetConstant(x86cc, DL, MVT::i8));
// Need to fill with zeros to ensure the bitcast will produce zeroes
- // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
+ // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee
+ // that.
SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1,
DAG.getConstant(0, DL, MVT::v16i1),
FSetCC, DAG.getVectorIdxConstant(0, DL));
@@ -50778,8 +50858,8 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
SDValue OnesOrZeroesI = DAG.getBitcast(IntVT, OnesOrZeroesF);
SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
DAG.getConstant(1, DL, IntVT));
- SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
- ANDed);
+ SDValue OneBitOfTruth =
+ DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
return OneBitOfTruth;
}
}
@@ -50967,7 +51047,8 @@ static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL,
assert(VT.isVector() && "Expected vector type");
assert((N.getOpcode() == ISD::ANY_EXTEND ||
N.getOpcode() == ISD::ZERO_EXTEND ||
- N.getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node");
+ N.getOpcode() == ISD::SIGN_EXTEND) &&
+ "Invalid Node");
SDValue Narrow = N.getOperand(0);
EVT NarrowVT = Narrow.getValueType();
@@ -50977,26 +51058,27 @@ static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL,
if (!Op)
return SDValue();
switch (N.getOpcode()) {
- default: llvm_unreachable("Unexpected opcode");
+ default:
+ llvm_unreachable("Unexpected opcode");
case ISD::ANY_EXTEND:
return Op;
case ISD::ZERO_EXTEND:
return DAG.getZeroExtendInReg(Op, DL, NarrowVT);
case ISD::SIGN_EXTEND:
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
- Op, DAG.getValueType(NarrowVT));
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
+ DAG.getValueType(NarrowVT));
}
}
static unsigned convertIntLogicToFPLogicOpcode(unsigned Opcode) {
unsigned FPOpcode;
switch (Opcode) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Unexpected input node for FP logic conversion");
case ISD::AND: FPOpcode = X86ISD::FAND; break;
case ISD::OR: FPOpcode = X86ISD::FOR; break;
case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
- // clang-format on
+ // clang-format on
}
return FPOpcode;
}
@@ -51442,8 +51524,7 @@ static SDValue combineScalarAndWithMaskSetcc(SDNode *N, SelectionDAG &DAG,
SmallVector<SDValue, 4> Ops(Src.getNumOperands(),
DAG.getConstant(0, dl, SubVecVT));
Ops[0] = SubVec;
- SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT,
- Ops);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, Ops);
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getSizeInBits());
return DAG.getZExtOrTrunc(DAG.getBitcast(IntVT, Concat), dl, VT);
}
@@ -52615,7 +52696,8 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (NotCond) {
SDValue R = DAG.getZExtOrTrunc(NotCond, dl, VT);
- R = DAG.getNode(ISD::MUL, dl, VT, R, DAG.getConstant(Val + 1, dl, VT));
+ R = DAG.getNode(ISD::MUL, dl, VT, R,
+ DAG.getConstant(Val + 1, dl, VT));
R = DAG.getNode(ISD::SUB, dl, VT, R, DAG.getConstant(1, dl, VT));
return R;
}
@@ -52757,7 +52839,7 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
return SDValue();
switch (VT.getSimpleVT().SimpleTy) {
- // clang-format off
+ // clang-format off
default: return SDValue();
case MVT::v16i8:
case MVT::v8i16:
@@ -52887,8 +52969,8 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
// split across two registers. We can use a packusdw+perm to clamp to 0-65535
// and concatenate at the same time. Then we can use a final vpmovuswb to
// clip to 0-255.
- if (Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
- InVT == MVT::v16i32 && VT == MVT::v16i8) {
+ if (Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && InVT == MVT::v16i32 &&
+ VT == MVT::v16i8) {
if (SDValue USatVal = detectSSatPattern(In, VT, true)) {
// Emit a VPACKUSDW+VPERMQ followed by a VPMOVUSWB.
SDValue Mid = truncateVectorWithPACK(X86ISD::PACKUS, MVT::v16i16, USatVal,
@@ -52904,11 +52986,12 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
// FIXME: We could widen truncates to 512 to remove the VLX restriction.
// If the result type is 256-bits or larger and we have disable 512-bit
// registers, we should go ahead and use the pack instructions if possible.
- bool PreferAVX512 = ((Subtarget.hasAVX512() && InSVT == MVT::i32) ||
- (Subtarget.hasBWI() && InSVT == MVT::i16)) &&
- (InVT.getSizeInBits() > 128) &&
- (Subtarget.hasVLX() || InVT.getSizeInBits() > 256) &&
- !(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256);
+ bool PreferAVX512 =
+ ((Subtarget.hasAVX512() && InSVT == MVT::i32) ||
+ (Subtarget.hasBWI() && InSVT == MVT::i16)) &&
+ (InVT.getSizeInBits() > 128) &&
+ (Subtarget.hasVLX() || InVT.getSizeInBits() > 256) &&
+ !(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256);
if (!PreferAVX512 && VT.getVectorNumElements() > 1 &&
isPowerOf2_32(VT.getVectorNumElements()) &&
@@ -52921,8 +53004,8 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL,
DAG, Subtarget);
assert(Mid && "Failed to pack!");
- SDValue V = truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG,
- Subtarget);
+ SDValue V =
+ truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG, Subtarget);
assert(V && "Failed to pack!");
return V;
} else if (SVT == MVT::i8 || Subtarget.hasSSE41())
@@ -53244,10 +53327,9 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
CastVT = VT.changeVectorElementType(EltVT);
}
- SDValue Load =
- DAG.getLoad(EltVT, DL, ML->getChain(), Addr,
- ML->getPointerInfo().getWithOffset(Offset),
- Alignment, ML->getMemOperand()->getFlags());
+ SDValue Load = DAG.getLoad(EltVT, DL, ML->getChain(), Addr,
+ ML->getPointerInfo().getWithOffset(Offset),
+ Alignment, ML->getMemOperand()->getFlags());
SDValue PassThru = DAG.getBitcast(CastVT, ML->getPassThru());
@@ -53278,8 +53360,8 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
if (LoadFirstElt && LoadLastElt) {
SDValue VecLd = DAG.getLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
ML->getMemOperand());
- SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd,
- ML->getPassThru());
+ SDValue Blend =
+ DAG.getSelect(DL, VT, ML->getMask(), VecLd, ML->getPassThru());
return DCI.CombineTo(ML, Blend, VecLd.getValue(1), true);
}
@@ -53301,8 +53383,8 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
VT, DL, ML->getChain(), ML->getBasePtr(), ML->getOffset(), ML->getMask(),
DAG.getUNDEF(VT), ML->getMemoryVT(), ML->getMemOperand(),
ML->getAddressingMode(), ML->getExtensionType());
- SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML,
- ML->getPassThru());
+ SDValue Blend =
+ DAG.getSelect(DL, VT, ML->getMask(), NewML, ML->getPassThru());
return DCI.CombineTo(ML, Blend, NewML.getValue(1), true);
}
@@ -53382,8 +53464,8 @@ static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,
// Store that element at the appropriate offset from the base pointer.
return DAG.getStore(MS->getChain(), DL, Extract, Addr,
- MS->getPointerInfo().getWithOffset(Offset),
- Alignment, MS->getMemOperand()->getFlags());
+ MS->getPointerInfo().getWithOffset(Offset), Alignment,
+ MS->getMemOperand()->getFlags());
}
static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
@@ -53483,15 +53565,16 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
// Turn vXi1 stores of constants into a scalar store.
if ((VT == MVT::v8i1 || VT == MVT::v16i1 || VT == MVT::v32i1 ||
- VT == MVT::v64i1) && VT == StVT && TLI.isTypeLegal(VT) &&
+ VT == MVT::v64i1) &&
+ VT == StVT && TLI.isTypeLegal(VT) &&
ISD::isBuildVectorOfConstantSDNodes(StoredVal.getNode())) {
// If its a v64i1 store without 64-bit support, we need two stores.
if (!DCI.isBeforeLegalize() && VT == MVT::v64i1 && !Subtarget.is64Bit()) {
- SDValue Lo = DAG.getBuildVector(MVT::v32i1, dl,
- StoredVal->ops().slice(0, 32));
+ SDValue Lo =
+ DAG.getBuildVector(MVT::v32i1, dl, StoredVal->ops().slice(0, 32));
Lo = combinevXi1ConstantToInteger(Lo, DAG);
- SDValue Hi = DAG.getBuildVector(MVT::v32i1, dl,
- StoredVal->ops().slice(32, 32));
+ SDValue Hi =
+ DAG.getBuildVector(MVT::v32i1, dl, StoredVal->ops().slice(32, 32));
Hi = combinevXi1ConstantToInteger(Hi, DAG);
SDValue Ptr0 = St->getBasePtr();
@@ -53591,9 +53674,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
StoredVal.hasOneUse() &&
TLI.isTruncStoreLegal(StoredVal.getOperand(0).getValueType(), VT)) {
bool IsSigned = StoredVal.getOpcode() == X86ISD::VTRUNCS;
- return EmitTruncSStore(IsSigned, St->getChain(),
- dl, StoredVal.getOperand(0), St->getBasePtr(),
- VT, St->getMemOperand(), DAG);
+ return EmitTruncSStore(IsSigned, St->getChain(), dl,
+ StoredVal.getOperand(0), St->getBasePtr(), VT,
+ St->getMemOperand(), DAG);
}
// Try to fold a extract_element(VTRUNC) pattern into a truncating store.
@@ -53632,14 +53715,14 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
if (St->isTruncatingStore() && VT.isVector()) {
if (TLI.isTruncStoreLegal(VT, StVT)) {
if (SDValue Val = detectSSatPattern(St->getValue(), St->getMemoryVT()))
- return EmitTruncSStore(true /* Signed saturation */, St->getChain(),
- dl, Val, St->getBasePtr(),
- St->getMemoryVT(), St->getMemOperand(), DAG);
- if (SDValue Val = detectUSatPattern(St->getValue(), St->getMemoryVT(),
- DAG, dl))
+ return EmitTruncSStore(true /* Signed saturation */, St->getChain(), dl,
+ Val, St->getBasePtr(), St->getMemoryVT(),
+ St->getMemOperand(), DAG);
+ if (SDValue Val =
+ detectUSatPattern(St->getValue(), St->getMemoryVT(), DAG, dl))
return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
- dl, Val, St->getBasePtr(),
- St->getMemoryVT(), St->getMemOperand(), DAG);
+ dl, Val, St->getBasePtr(), St->getMemoryVT(),
+ St->getMemOperand(), DAG);
}
return SDValue();
@@ -54228,23 +54311,14 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
// cases.
static SDValue combinei64TruncSrlConstant(SDValue N, EVT VT, SelectionDAG &DAG,
const SDLoc &DL) {
- assert(N.getOpcode() == ISD::SRL && "Unknown shift opcode");
- std::optional<uint64_t> ValidSrlConst = DAG.getValidShiftAmount(N);
- if (!ValidSrlConst)
- return SDValue();
- uint64_t SrlConstVal = *ValidSrlConst;
SDValue Op = N.getOperand(0);
+ APInt OpConst = Op.getConstantOperandAPInt(1);
+ APInt SrlConst = N.getConstantOperandAPInt(1);
+ uint64_t SrlConstVal = SrlConst.getZExtValue();
unsigned Opcode = Op.getOpcode();
- assert(VT == MVT::i32 && Op.getValueType() == MVT::i64 &&
- "Illegal truncation types");
- if ((Opcode != ISD::ADD && Opcode != ISD::OR && Opcode != ISD::XOR) ||
- !isa<ConstantSDNode>(Op.getOperand(1)))
- return SDValue();
- const APInt &OpConst = Op.getConstantOperandAPInt(1);
-
- if (SrlConstVal <= 32 ||
+ if (SrlConst.ule(32) ||
(Opcode == ISD::ADD && OpConst.countr_zero() < SrlConstVal))
return SDValue();
@@ -54252,14 +54326,13 @@ static SDValue combinei64TruncSrlConstant(SDValue N, EVT VT, SelectionDAG &DAG,
DAG.getNode(ISD::SRL, DL, MVT::i64, Op.getOperand(0), N.getOperand(1));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, OpLhsSrl);
- APInt NewOpConstVal = OpConst.lshr(SrlConstVal).trunc(VT.getSizeInBits());
+ APInt NewOpConstVal = OpConst.lshr(SrlConst).trunc(VT.getSizeInBits());
SDValue NewOpConst = DAG.getConstant(NewOpConstVal, DL, VT);
SDValue NewOpNode = DAG.getNode(Opcode, DL, VT, Trunc, NewOpConst);
+ EVT CleanUpVT = EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConstVal);
- if (Opcode == ISD::ADD) {
- EVT CleanUpVT = EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConstVal);
+ if (Opcode == ISD::ADD)
return DAG.getZeroExtendInReg(NewOpNode, DL, CleanUpVT);
- }
return NewOpNode;
}
@@ -54308,8 +54381,20 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
if (!Src.hasOneUse())
return SDValue();
- if (VT == MVT::i32 && SrcVT == MVT::i64 && SrcOpcode == ISD::SRL)
- return combinei64TruncSrlConstant(Src, VT, DAG, DL);
+ if (VT == MVT::i32 && SrcVT == MVT::i64 && SrcOpcode == ISD::SRL &&
+ isa<ConstantSDNode>(Src.getOperand(1))) {
+
+ unsigned SrcOpOpcode = Src.getOperand(0).getOpcode();
+ if ((SrcOpOpcode != ISD::ADD && SrcOpOpcode != ISD::OR &&
+ SrcOpOpcode != ISD::XOR) ||
+ !isa<ConstantSDNode>(Src.getOperand(0).getOperand(1)))
+ return SDValue();
+
+ if (SDValue R = combinei64TruncSrlConstant(Src, VT, DAG, DL))
+ return R;
+
+ return SDValue();
+ }
if (!VT.isVector())
return SDValue();
@@ -54430,8 +54515,7 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
// (i16 (ssat (add (mul (zext (even elts (i8 A))), (sext (even elts (i8 B)))),
// (mul (zext (odd elts (i8 A)), (sext (odd elts (i8 B))))))))
static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- const SDLoc &DL) {
+ const X86Subtarget &Subtarget, const SDLoc &DL) {
if (!VT.isVector() || !Subtarget.hasSSSE3())
return SDValue();
@@ -54527,8 +54611,8 @@ static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
std::swap(IdxN01, IdxN11);
}
// N0 indices be the even element. N1 indices must be the next odd element.
- if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 ||
- IdxN01 != 2 * i || IdxN11 != 2 * i + 1)
+ if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || IdxN01 != 2 * i ||
+ IdxN11 != 2 * i + 1)
return SDValue();
SDValue N00In = N00Elt.getOperand(0);
SDValue N01In = N01Elt.getOperand(0);
@@ -54539,8 +54623,8 @@ static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
ZExtIn = N00In;
SExtIn = N01In;
}
- if (ZExtIn != N00In || SExtIn != N01In ||
- ZExtIn != N10In || SExtIn != N11In)
+ if (ZExtIn != N00In || SExtIn != N01In || ZExtIn != N10In ||
+ SExtIn != N11In)
return SDValue();
}
@@ -54560,14 +54644,13 @@ static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
// Shrink by adding truncate nodes and let DAGCombine fold with the
// sources.
EVT InVT = Ops[0].getValueType();
- assert(InVT.getScalarType() == MVT::i8 &&
- "Unexpected scalar element type");
+ assert(InVT.getScalarType() == MVT::i8 && "Unexpected scalar element type");
assert(InVT == Ops[1].getValueType() && "Operands' types mismatch");
EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
InVT.getVectorNumElements() / 2);
return DAG.getNode(X86ISD::VPMADDUBSW, DL, ResVT, Ops[0], Ops[1]);
};
- return SplitOpsAndApply(DAG, Subtarget, DL, VT, { ZExtIn, SExtIn },
+ return SplitOpsAndApply(DAG, Subtarget, DL, VT, {ZExtIn, SExtIn},
PMADDBuilder);
}
@@ -54715,7 +54798,7 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc,
bool NegRes) {
if (NegMul) {
switch (Opcode) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Unexpected opcode");
case ISD::FMA: Opcode = X86ISD::FNMADD; break;
case ISD::STRICT_FMA: Opcode = X86ISD::STRICT_FNMADD; break;
@@ -54729,13 +54812,13 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc,
case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break;
case X86ISD::STRICT_FNMSUB: Opcode = X86ISD::STRICT_FMSUB; break;
case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break;
- // clang-format on
+ // clang-format on
}
}
if (NegAcc) {
switch (Opcode) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Unexpected opcode");
case ISD::FMA: Opcode = X86ISD::FMSUB; break;
case ISD::STRICT_FMA: Opcode = X86ISD::STRICT_FMSUB; break;
@@ -54753,7 +54836,7 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc,
case X86ISD::FMADDSUB_RND: Opcode = X86ISD::FMSUBADD_RND; break;
case X86ISD::FMSUBADD: Opcode = X86ISD::FMADDSUB; break;
case X86ISD::FMSUBADD_RND: Opcode = X86ISD::FMADDSUB_RND; break;
- // clang-format on
+ // clang-format on
}
}
@@ -54770,7 +54853,7 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc,
case X86ISD::FNMADD_RND: Opcode = X86ISD::FMSUB_RND; break;
case X86ISD::FNMSUB: Opcode = ISD::FMA; break;
case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMADD_RND; break;
- // clang-format on
+ // clang-format on
}
}
@@ -54906,19 +54989,18 @@ static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1));
unsigned IntOpcode;
switch (N->getOpcode()) {
- // clang-format off
+ // clang-format off
default: llvm_unreachable("Unexpected FP logic op");
case X86ISD::FOR: IntOpcode = ISD::OR; break;
case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
case X86ISD::FAND: IntOpcode = ISD::AND; break;
case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
- // clang-format on
+ // clang-format on
}
SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
return DAG.getBitcast(VT, IntOp);
}
-
/// Fold a xor(setcc cond, val), 1 --> setcc (inverted(cond), val)
static SDValue foldXor1SetCC(SDNode *N, SelectionDAG &DAG) {
if (N->getOpcode() != ISD::XOR)
@@ -55266,13 +55348,18 @@ static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) {
// into FMINC and FMAXC, which are Commutative operations.
unsigned NewOp = 0;
switch (N->getOpcode()) {
- default: llvm_unreachable("unknown opcode");
- case X86ISD::FMIN: NewOp = X86ISD::FMINC; break;
- case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break;
+ default:
+ llvm_unreachable("unknown opcode");
+ case X86ISD::FMIN:
+ NewOp = X86ISD::FMINC;
+ break;
+ case X86ISD::FMAX:
+ NewOp = X86ISD::FMAXC;
+ break;
}
- return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0),
- N->getOperand(0), N->getOperand(1));
+ return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0), N->getOperand(0),
+ N->getOperand(1));
}
static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
@@ -55311,8 +55398,8 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
if (!VT.isVector() && DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
- EVT SetCCType = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
- VT);
+ EVT SetCCType =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
// There are 4 possibilities involving NaN inputs, and these are the required
// outputs:
@@ -55362,8 +55449,8 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits);
if (SDValue VZLoad = narrowLoadToVZLoad(LN, MemVT, LoadVT, DAG)) {
SDLoc dl(N);
- SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT,
- DAG.getBitcast(InVT, VZLoad));
+ SDValue Convert =
+ DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(InVT, VZLoad));
DCI.CombineTo(N, Convert);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
DCI.recursivelyDeleteUnusedNodes(LN);
@@ -55856,8 +55943,8 @@ static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
// Only combine legal element types.
EVT SVT = VT.getVectorElementType();
- if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 &&
- SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64)
+ if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 && SVT != MVT::i64 &&
+ SVT != MVT::f32 && SVT != MVT::f64)
return SDValue();
// We don't have CMPP Instruction for vxf16
@@ -55897,16 +55984,15 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
// (i32 (sext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry))
- if (!DCI.isBeforeLegalizeOps() &&
- N0.getOpcode() == X86ISD::SETCC_CARRY) {
+ if (!DCI.isBeforeLegalizeOps() && N0.getOpcode() == X86ISD::SETCC_CARRY) {
SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, N0->getOperand(0),
- N0->getOperand(1));
+ N0->getOperand(1));
bool ReplaceOtherUses = !N0.hasOneUse();
DCI.CombineTo(N, Setcc);
// Replace other uses with a truncate of the widened setcc_carry.
if (ReplaceOtherUses) {
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), Setcc);
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), Setcc);
DCI.CombineTo(N0.getNode(), Trunc);
}
@@ -56199,13 +56285,13 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ANY_EXTEND &&
N0.getOpcode() == X86ISD::SETCC_CARRY) {
SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N0->getOperand(0),
- N0->getOperand(1));
+ N0->getOperand(1));
bool ReplaceOtherUses = !N0.hasOneUse();
DCI.CombineTo(N, Setcc);
// Replace other uses with a truncate of the widened setcc_carry.
if (ReplaceOtherUses) {
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), Setcc);
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), Setcc);
DCI.CombineTo(N0.getNode(), Trunc);
}
@@ -56441,8 +56527,8 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
if (CInt.isPowerOf2() && !CInt.isMinSignedValue()) {
SDValue BaseOp = LHS.getOperand(0);
SDValue SETCC0 = DAG.getSetCC(DL, VT, BaseOp, RHS, CC);
- SDValue SETCC1 = DAG.getSetCC(
- DL, VT, BaseOp, DAG.getConstant(-CInt, DL, OpVT), CC);
+ SDValue SETCC1 = DAG.getSetCC(DL, VT, BaseOp,
+ DAG.getConstant(-CInt, DL, OpVT), CC);
return DAG.getNode(CC == ISD::SETEQ ? ISD::OR : ISD::AND, DL, VT,
SETCC0, SETCC1);
}
@@ -56802,19 +56888,25 @@ static SDValue rebuildGatherScatter(MaskedGatherScatterSDNode *GorS,
SDLoc DL(GorS);
if (auto *Gather = dyn_cast<MaskedGatherSDNode>(GorS)) {
- SDValue Ops[] = { Gather->getChain(), Gather->getPassThru(),
- Gather->getMask(), Base, Index, Scale } ;
- return DAG.getMaskedGather(Gather->getVTList(),
- Gather->getMemoryVT(), DL, Ops,
- Gather->getMemOperand(),
+ SDValue Ops[] = {Gather->getChain(),
+ Gather->getPassThru(),
+ Gather->getMask(),
+ Base,
+ Index,
+ Scale};
+ return DAG.getMaskedGather(Gather->getVTList(), Gather->getMemoryVT(), DL,
+ Ops, Gather->getMemOperand(),
Gather->getIndexType(),
Gather->getExtensionType());
}
auto *Scatter = cast<MaskedScatterSDNode>(GorS);
- SDValue Ops[] = { Scatter->getChain(), Scatter->getValue(),
- Scatter->getMask(), Base, Index, Scale };
- return DAG.getMaskedScatter(Scatter->getVTList(),
- Scatter->getMemoryVT(), DL,
+ SDValue Ops[] = {Scatter->getChain(),
+ Scatter->getValue(),
+ Scatter->getMask(),
+ Base,
+ Index,
+ Scale};
+ return DAG.getMaskedScatter(Scatter->getVTList(), Scatter->getMemoryVT(), DL,
Ops, Scatter->getMemOperand(),
Scatter->getIndexType(),
Scatter->isTruncatingStore());
@@ -57045,8 +57137,8 @@ static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
// The AND node needs bitcasts to/from an integer vector type around it.
SDValue MaskConst = DAG.getBitcast(IntVT, SourceConst);
- SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, Op0->getOperand(0),
- MaskConst);
+ SDValue NewAnd =
+ DAG.getNode(ISD::AND, DL, IntVT, Op0->getOperand(0), MaskConst);
SDValue Res = DAG.getBitcast(VT, NewAnd);
if (IsStrict)
return DAG.getMergeValues({Res, SourceConst.getValue(1)}, DL);
@@ -57232,8 +57324,8 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
// use CVTSI2P.
assert(InVT == MVT::v2i64 && "Unexpected VT!");
SDValue Cast = DAG.getBitcast(MVT::v4i32, Op0);
- SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast,
- { 0, 2, -1, -1 });
+ SDValue Shuf =
+ DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast, {0, 2, -1, -1});
if (IsStrict)
return DAG.getNode(X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other},
{N->getOperand(0), Shuf});
@@ -57334,7 +57426,7 @@ static bool needCarryOrOverflowFlag(SDValue Flags) {
}
switch (CC) {
- // clang-format off
+ // clang-format off
default: break;
case X86::COND_A: case X86::COND_AE:
case X86::COND_B: case X86::COND_BE:
@@ -57342,7 +57434,7 @@ static bool needCarryOrOverflowFlag(SDValue Flags) {
case X86::COND_G: case X86::COND_GE:
case X86::COND_L: case X86::COND_LE:
return true;
- // clang-format on
+ // clang-format on
}
}
@@ -57478,11 +57570,12 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
// After this the truncate and arithmetic op must have a single use.
if (!Trunc.hasOneUse() || !Op.hasOneUse())
- return SDValue();
+ return SDValue();
unsigned NewOpc;
switch (Op.getOpcode()) {
- default: return SDValue();
+ default:
+ return SDValue();
case ISD::AND:
// Skip and with constant. We have special handling for and with immediate
// during isel to generate test instructions.
@@ -57490,8 +57583,12 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
return SDValue();
NewOpc = X86ISD::AND;
break;
- case ISD::OR: NewOpc = X86ISD::OR; break;
- case ISD::XOR: NewOpc = X86ISD::XOR; break;
+ case ISD::OR:
+ NewOpc = X86ISD::OR;
+ break;
+ case ISD::XOR:
+ NewOpc = X86ISD::XOR;
+ break;
case ISD::ADD:
// If the carry or overflow flag is used, we can't truncate.
if (needCarryOrOverflowFlag(SDValue(N, 0)))
@@ -57651,9 +57748,8 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue matchPMADDWD(SelectionDAG &DAG, SDNode *N,
- const SDLoc &DL, EVT VT,
- const X86Subtarget &Subtarget) {
+static SDValue matchPMADDWD(SelectionDAG &DAG, SDNode *N, const SDLoc &DL,
+ EVT VT, const X86Subtarget &Subtarget) {
using namespace SDPatternMatch;
// Example of pattern we try to detect:
@@ -57761,9 +57857,8 @@ static SDValue matchPMADDWD(SelectionDAG &DAG, SDNode *N,
// Attempt to turn this pattern into PMADDWD.
// (add (mul (sext (build_vector)), (sext (build_vector))),
// (mul (sext (build_vector)), (sext (build_vector)))
-static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDNode *N,
- const SDLoc &DL, EVT VT,
- const X86Subtarget &Subtarget) {
+static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDNode *N, const SDLoc &DL,
+ EVT VT, const X86Subtarget &Subtarget) {
using namespace SDPatternMatch;
if (!Subtarget.hasSSE2())
@@ -57859,7 +57954,7 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDNode *N,
// If the output is narrower than an input, extract the low part of the input
// vector.
EVT OutVT16 = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
- VT.getVectorNumElements() * 2);
+ VT.getVectorNumElements() * 2);
if (OutVT16.bitsLT(In0.getValueType())) {
In0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT16, In0,
DAG.getVectorIdxConstant(0, DL));
@@ -57868,8 +57963,7 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDNode *N,
In1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT16, In1,
DAG.getVectorIdxConstant(0, DL));
}
- return SplitOpsAndApply(DAG, Subtarget, DL, VT, { In0, In1 },
- PMADDBuilder);
+ return SplitOpsAndApply(DAG, Subtarget, DL, VT, {In0, In1}, PMADDBuilder);
}
// ADD(VPMADDWD(X,Y),VPMADDWD(Z,W)) -> VPMADDWD(SHUFFLE(X,Z), SHUFFLE(Y,W))
@@ -58750,8 +58844,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
unsigned Imm1 = Ops[1].getConstantOperandVal(2);
// TODO: Handle zero'd subvectors.
if ((Imm0 & 0x88) == 0 && (Imm1 & 0x88) == 0) {
- int Mask[4] = {(int)(Imm0 & 0x03), (int)((Imm0 >> 4) & 0x3), (int)(Imm1 & 0x03),
- (int)((Imm1 >> 4) & 0x3)};
+ int Mask[4] = {(int)(Imm0 & 0x03), (int)((Imm0 >> 4) & 0x3),
+ (int)(Imm1 & 0x03), (int)((Imm1 >> 4) & 0x3)};
MVT ShuffleVT = VT.isFloatingPoint() ? MVT::v8f64 : MVT::v8i64;
SDValue LHS = concatSubVectors(Ops[0].getOperand(0),
Ops[0].getOperand(1), DAG, DL);
@@ -58937,8 +59031,7 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
break;
}
- ISD::CondCode ICC =
- Opcode == X86ISD::PCMPEQ ? ISD::SETEQ : ISD::SETGT;
+ ISD::CondCode ICC = Opcode == X86ISD::PCMPEQ ? ISD::SETEQ : ISD::SETGT;
ISD::CondCode FCC =
Opcode == X86ISD::PCMPEQ ? ISD::SETOEQ : ISD::SETOGT;
@@ -59240,7 +59333,8 @@ static SDValue combineCONCAT_VECTORS(SDNode *N, SelectionDAG &DAG,
APInt Constant = APInt::getZero(VT.getSizeInBits());
for (unsigned I = 0, E = Ops.size(); I != E; ++I) {
auto *C = dyn_cast<ConstantSDNode>(peekThroughBitcasts(Ops[I]));
- if (!C) break;
+ if (!C)
+ break;
Constant.insertBits(C->getAPIntValue(), I * SubSizeInBits);
if (I == (E - 1)) {
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
@@ -59313,9 +59407,9 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
ISD::isBuildVectorAllZeros(Ins.getOperand(0).getNode()) &&
Ins.getOperand(1).getValueSizeInBits().getFixedValue() <=
SubVecVT.getFixedSizeInBits())
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
- getZeroVector(OpVT, Subtarget, DAG, dl),
- Ins.getOperand(1), N->getOperand(2));
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
+ getZeroVector(OpVT, Subtarget, DAG, dl),
+ Ins.getOperand(1), N->getOperand(2));
}
}
@@ -59549,8 +59643,7 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc DL(N);
- if (Subtarget.hasAVX() && !Subtarget.hasAVX2() &&
- TLI.isTypeLegal(InVecVT) &&
+ if (Subtarget.hasAVX() && !Subtarget.hasAVX2() && TLI.isTypeLegal(InVecVT) &&
InSizeInBits == 256 && InVecBC.getOpcode() == ISD::AND) {
auto isConcatenatedNot = [](SDValue V) {
V = peekThroughBitcasts(V);
@@ -60007,7 +60100,7 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
LHS.getOperand(0).getValueType() == MVT::v4i32) {
SDLoc dl(N);
LHS = DAG.getVectorShuffle(MVT::v4i32, dl, LHS.getOperand(0),
- LHS.getOperand(0), { 0, -1, 1, -1 });
+ LHS.getOperand(0), {0, -1, 1, -1});
LHS = DAG.getBitcast(MVT::v2i64, LHS);
return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS);
}
@@ -60017,7 +60110,7 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
RHS.getOperand(0).getValueType() == MVT::v4i32) {
SDLoc dl(N);
RHS = DAG.getVectorShuffle(MVT::v4i32, dl, RHS.getOperand(0),
- RHS.getOperand(0), { 0, -1, 1, -1 });
+ RHS.getOperand(0), {0, -1, 1, -1});
RHS = DAG.getBitcast(MVT::v2i64, RHS);
return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS);
}
@@ -60253,16 +60346,16 @@ static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG,
// Widen to at least 8 input elements.
if (NumElts < 8) {
unsigned NumConcats = 8 / NumElts;
- SDValue Fill = NumElts == 4 ? DAG.getUNDEF(IntVT)
- : DAG.getConstant(0, dl, IntVT);
+ SDValue Fill =
+ NumElts == 4 ? DAG.getUNDEF(IntVT) : DAG.getConstant(0, dl, IntVT);
SmallVector<SDValue, 4> Ops(NumConcats, Fill);
Ops[0] = Src;
Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, Ops);
}
// Destination is vXf32 with at least 4 elements.
- EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32,
- std::max(4U, NumElts));
+ EVT CvtVT =
+ EVT::getVectorVT(*DAG.getContext(), MVT::f32, std::max(4U, NumElts));
SDValue Cvt, Chain;
if (IsStrict) {
Cvt = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {CvtVT, MVT::Other},
@@ -60532,7 +60625,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
- // clang-format off
+ // clang-format off
default: break;
case ISD::SCALAR_TO_VECTOR:
return combineSCALAR_TO_VECTOR(N, DAG, Subtarget);
@@ -60881,7 +60974,8 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
bool Commute = false;
switch (Op.getOpcode()) {
- default: return false;
+ default:
+ return false;
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
@@ -60921,8 +61015,7 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
((Commute && !isa<ConstantSDNode>(N1)) ||
(Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op))))
return false;
- if (IsFoldableAtomicRMW(N0, Op) ||
- (Commute && IsFoldableAtomicRMW(N1, Op)))
+ if (IsFoldableAtomicRMW(N0, Op) || (Commute && IsFoldableAtomicRMW(N1, Op)))
return false;
}
}
@@ -60984,7 +61077,8 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
SplitString(AsmStr, AsmPieces, ";\n");
switch (AsmPieces.size()) {
- default: return false;
+ default:
+ return false;
case 1:
// FIXME: this should verify that we are targeting a 486 or better. If not,
// we will turn this bswap into something that will be lowered to logical
@@ -61031,9 +61125,9 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
if (CI->getType()->isIntegerTy(64)) {
InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
- if (Constraints.size() >= 2 &&
- Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
- Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+ if (Constraints.size() >= 2 && Constraints[0].Codes.size() == 1 &&
+ Constraints[0].Codes[0] == "A" && Constraints[1].Codes.size() == 1 &&
+ Constraints[1].Codes[0] == "0") {
// bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
if (matchAsm(AsmPieces[0], {"bswap", "%eax"}) &&
matchAsm(AsmPieces[1], {"bswap", "%edx"}) &&
@@ -61120,8 +61214,7 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
default:
break;
}
- }
- else if (Constraint.size() == 2) {
+ } else if (Constraint.size() == 2) {
switch (Constraint[0]) {
default:
break;
@@ -61310,8 +61403,7 @@ X86TargetLowering::getSingleConstraintMatchWeight(
/// Try to replace an X constraint, which matches anything, with another that
/// has more specific requirements based on the type of the corresponding
/// operand.
-const char *X86TargetLowering::
-LowerXConstraint(EVT ConstraintVT) const {
+const char *X86TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
// FP X constraints get lowered to SSE1/2 registers if available, otherwise
// 'f' like normal targets.
if (ConstraintVT.isFloatingPoint()) {
@@ -61357,7 +61449,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
SDValue Result;
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
- default: break;
+ default:
+ break;
case 'I':
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 31) {
@@ -61431,8 +61524,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64);
break;
}
- // FIXME gcc accepts some relocatable values here too, but only in certain
- // memory models; it's complicated.
+ // FIXME gcc accepts some relocatable values here too, but only in certain
+ // memory models; it's complicated.
}
return;
}
@@ -61475,8 +61568,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
if (auto *CST = dyn_cast<ConstantSDNode>(Op)) {
bool IsBool = CST->getConstantIntValue()->getBitWidth() == 1;
BooleanContent BCont = getBooleanContents(MVT::i64);
- ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
- : ISD::SIGN_EXTEND;
+ ISD::NodeType ExtOpc =
+ IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? CST->getZExtValue()
: CST->getSExtValue();
Result = DAG.getTargetConstant(ExtVal, SDLoc(Op), MVT::i64);
@@ -61555,7 +61648,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Constraint.size() == 1) {
// GCC Constraint Letters
switch (Constraint[0]) {
- default: break;
+ default:
+ break;
// 'A' means [ER]AX + [ER]DX.
case 'A':
if (Subtarget.is64Bit())
@@ -61583,7 +61677,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &X86::VK64RegClass);
}
break;
- case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
+ case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
if (Subtarget.is64Bit()) {
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
@@ -61605,7 +61699,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
[[fallthrough]];
// 32-bit fallthrough
- case 'Q': // Q_REGS
+ case 'Q': // Q_REGS
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8_ABCD_LRegClass);
if (VT == MVT::i16)
@@ -61616,8 +61710,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (VT != MVT::f80 && !VT.isVector())
return std::make_pair(0U, &X86::GR64_ABCDRegClass);
break;
- case 'r': // GENERAL_REGS
- case 'l': // INDEX_REGS
+ case 'r': // GENERAL_REGS
+ case 'l': // INDEX_REGS
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
? &X86::GR8RegClass
@@ -61636,7 +61730,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
? &X86::GR64RegClass
: &X86::GR64_NOREX2RegClass);
break;
- case 'R': // LEGACY_REGS
+ case 'R': // LEGACY_REGS
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8_NOREXRegClass);
if (VT == MVT::i16)
@@ -61647,7 +61741,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (VT != MVT::f80 && !VT.isVector())
return std::make_pair(0U, &X86::GR64_NOREXRegClass);
break;
- case 'f': // FP Stack registers.
+ case 'f': // FP Stack registers.
// If SSE is enabled for this VT, use f80 to ensure the isel moves the
// value to the correct fpstack register class.
if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
@@ -61657,16 +61751,19 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80)
return std::make_pair(0U, &X86::RFP80RegClass);
break;
- case 'y': // MMX_REGS if MMX allowed.
- if (!Subtarget.hasMMX()) break;
+ case 'y': // MMX_REGS if MMX allowed.
+ if (!Subtarget.hasMMX())
+ break;
return std::make_pair(0U, &X86::VR64RegClass);
case 'v':
- case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
- if (!Subtarget.hasSSE1()) break;
+ case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
+ if (!Subtarget.hasSSE1())
+ break;
bool VConstraint = (Constraint[0] == 'v');
switch (VT.SimpleTy) {
- default: break;
+ default:
+ break;
// Scalar SSE types.
case MVT::f16:
if (VConstraint && Subtarget.hasFP16())
@@ -61754,7 +61851,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case MVT::v16f32:
case MVT::v16i32:
case MVT::v8i64:
- if (!Subtarget.hasAVX512()) break;
+ if (!Subtarget.hasAVX512())
+ break;
if (VConstraint)
return std::make_pair(0U, &X86::VR512RegClass);
return std::make_pair(0U, &X86::VR512_0_15RegClass);
@@ -61770,12 +61868,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case '2':
return getRegForInlineAsmConstraint(TRI, "x", VT);
case 'm':
- if (!Subtarget.hasMMX()) break;
+ if (!Subtarget.hasMMX())
+ break;
return std::make_pair(0U, &X86::VR64RegClass);
case 'z':
- if (!Subtarget.hasSSE1()) break;
+ if (!Subtarget.hasSSE1())
+ break;
switch (VT.SimpleTy) {
- default: break;
+ default:
+ break;
// Scalar SSE types.
case MVT::f16:
if (!Subtarget.hasFP16())
@@ -61890,14 +61991,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// Use the default implementation in TargetLowering to convert the register
// constraint into a member of a register class.
- std::pair<Register, const TargetRegisterClass*> Res;
+ std::pair<Register, const TargetRegisterClass *> Res;
Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
// Not found as a standard register?
if (!Res.second) {
// Only match x87 registers if the VT is one SelectionDAGBuilder can convert
// to/from f80.
- if (VT == MVT::Other || VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80) {
+ if (VT == MVT::Other || VT == MVT::f32 || VT == MVT::f64 ||
+ VT == MVT::f80) {
// Map st(0) -> st(7) -> ST0
if (Constraint.size() == 7 && Constraint[0] == '{' &&
tolower(Constraint[1]) == 's' && tolower(Constraint[2]) == 't' &&
@@ -61955,7 +62057,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// turn into {ax},{dx}.
// MVT::Other is used to specify clobber names.
if (TRI->isTypeLegalForClass(*Res.second, VT) || VT == MVT::Other)
- return Res; // Correct type already, nothing to do.
+ return Res; // Correct type already, nothing to do.
// Get a matching integer of the correct size. i.e. "ax" with MVT::32 should
// return "eax". This should even work for things like getting 64bit integer
@@ -61967,7 +62069,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// Therefore, use a helper method.
if (isGRClass(*Class)) {
unsigned Size = VT.getSizeInBits();
- if (Size == 1) Size = 8;
+ if (Size == 1)
+ Size = 8;
if (Size != 8 && Size != 16 && Size != 32 && Size != 64)
return std::make_pair(0, nullptr);
Register DestReg = getX86SubSuperRegister(Res.first, Size);
@@ -61975,9 +62078,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
bool is64Bit = Subtarget.is64Bit();
const TargetRegisterClass *RC =
Size == 8 ? (is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass)
- : Size == 16 ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass)
- : Size == 32 ? (is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass)
- : /*Size == 64*/ (is64Bit ? &X86::GR64RegClass : nullptr);
+ : Size == 16
+ ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass)
+ : Size == 32
+ ? (is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass)
+ : /*Size == 64*/ (is64Bit ? &X86::GR64RegClass : nullptr);
if (Size == 64 && !is64Bit) {
// Model GCC's behavior here and select a fixed pair of 32-bit
// registers.
@@ -62229,8 +62334,7 @@ X86TargetLowering::getStackProbeSymbolName(const MachineFunction &MF) const {
return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk";
}
-unsigned
-X86TargetLowering::getStackProbeSize(const MachineFunction &MF) const {
+unsigned X86TargetLowering::getStackProbeSize(const MachineFunction &MF) const {
// The default stack probe size is 4096 if the function has no stackprobesize
// attribute.
return MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size",
diff --git a/llvm/test/CodeGen/X86/ctlz-gfni.ll b/llvm/test/CodeGen/X86/ctlz-gfni.ll
new file mode 100644
index 0000000000000..d942f5ad3506f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/ctlz-gfni.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=x86_64 -mattr=+gfni,+ssse3 < %s | FileCheck %s
+
+define <16 x i8> @test_ctlz_gfni(<16 x i8> %x) {
+; CHECK-LABEL: @test_ctlz_gfni
+; CHECK: gf2p8affineqb
+; CHECK: paddb
+; CHECK: pandn
+; CHECK: gf2p8affineqb
+; CHECK: ret
+ %r = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %x, i1 false)
+ ret <16 x i8> %r
+}
+
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1)
+
diff --git a/llvm/test/CodeGen/X86/gfni-lzcnt.ll b/llvm/test/CodeGen/X86/gfni-lzcnt.ll
index 8e48950c32cd8..f4dd1d1b77ea9 100644
--- a/llvm/test/CodeGen/X86/gfni-lzcnt.ll
+++ b/llvm/test/CodeGen/X86/gfni-lzcnt.ll
@@ -8,40 +8,44 @@
define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
; GFNISSE-LABEL: testv16i8:
; GFNISSE: # %bb.0:
-; GFNISSE-NEXT: movq {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNISSE-NEXT: movdqa %xmm1, %xmm2
-; GFNISSE-NEXT: pshufb %xmm0, %xmm2
-; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; GFNISSE-NEXT: pxor %xmm3, %xmm3
-; GFNISSE-NEXT: pcmpeqb %xmm0, %xmm3
-; GFNISSE-NEXT: pand %xmm2, %xmm3
-; GFNISSE-NEXT: pshufb %xmm0, %xmm1
-; GFNISSE-NEXT: paddb %xmm3, %xmm1
+; GFNISSE-NEXT: movdqa {{.*#+}} xmm2 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm2, %xmm0
+; GFNISSE-NEXT: pcmpeqd %xmm1, %xmm1
+; GFNISSE-NEXT: paddb %xmm0, %xmm1
+; GFNISSE-NEXT: pandn %xmm0, %xmm1
+; GFNISSE-NEXT: gf2p8affineqb $8, %xmm2, %xmm1
; GFNISSE-NEXT: movdqa %xmm1, %xmm0
; GFNISSE-NEXT: retq
;
-; GFNIAVX1OR2-LABEL: testv16i8:
-; GFNIAVX1OR2: # %bb.0:
-; GFNIAVX1OR2-NEXT: vmovq {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX1OR2-NEXT: vpshufb %xmm0, %xmm1, %xmm2
-; GFNIAVX1OR2-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; GFNIAVX1OR2-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; GFNIAVX1OR2-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm3
-; GFNIAVX1OR2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; GFNIAVX1OR2-NEXT: vpshufb %xmm0, %xmm1, %xmm0
-; GFNIAVX1OR2-NEXT: vpaddb %xmm0, %xmm2, %xmm0
-; GFNIAVX1OR2-NEXT: retq
+; GFNIAVX1-LABEL: testv16i8:
+; GFNIAVX1: # %bb.0:
+; GFNIAVX1-NEXT: vmovddup {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX1-NEXT: # xmm1 = mem[0,0]
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0
+; GFNIAVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; GFNIAVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm2
+; GFNIAVX1-NEXT: vpandn %xmm0, %xmm2, %xmm0
+; GFNIAVX1-NEXT: vgf2p8affineqb $8, %xmm1, %xmm0, %xmm0
+; GFNIAVX1-NEXT: retq
+;
+; GFNIAVX2-LABEL: testv16i8:
+; GFNIAVX2: # %bb.0:
+; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX2-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0
+; GFNIAVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; GFNIAVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm2
+; GFNIAVX2-NEXT: vpandn %xmm0, %xmm2, %xmm0
+; GFNIAVX2-NEXT: vgf2p8affineqb $8, %xmm1, %xmm0, %xmm0
+; GFNIAVX2-NEXT: retq
;
; GFNIAVX512-LABEL: testv16i8:
; GFNIAVX512: # %bb.0:
-; GFNIAVX512-NEXT: vmovq {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX512-NEXT: vpshufb %xmm0, %xmm1, %xmm2
-; GFNIAVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; GFNIAVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; GFNIAVX512-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm3
-; GFNIAVX512-NEXT: vpand %xmm3, %xmm2, %xmm2
-; GFNIAVX512-NEXT: vpshufb %xmm0, %xmm1, %xmm0
-; GFNIAVX512-NEXT: vpaddb %xmm0, %xmm2, %xmm0
+; GFNIAVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX512-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0
+; GFNIAVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; GFNIAVX512-NEXT: vpaddb %xmm2, %xmm0, %xmm2
+; GFNIAVX512-NEXT: vpandn %xmm0, %xmm2, %xmm0
+; GFNIAVX512-NEXT: vgf2p8affineqb $8, %xmm1, %xmm0, %xmm0
; GFNIAVX512-NEXT: retq
%out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %in, i1 0)
ret <16 x i8> %out
@@ -50,40 +54,44 @@ define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind {
; GFNISSE-LABEL: testv16i8u:
; GFNISSE: # %bb.0:
-; GFNISSE-NEXT: movq {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNISSE-NEXT: movdqa %xmm1, %xmm2
-; GFNISSE-NEXT: pshufb %xmm0, %xmm2
-; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; GFNISSE-NEXT: pxor %xmm3, %xmm3
-; GFNISSE-NEXT: pcmpeqb %xmm0, %xmm3
-; GFNISSE-NEXT: pand %xmm2, %xmm3
-; GFNISSE-NEXT: pshufb %xmm0, %xmm1
-; GFNISSE-NEXT: paddb %xmm3, %xmm1
+; GFNISSE-NEXT: movdqa {{.*#+}} xmm2 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm2, %xmm0
+; GFNISSE-NEXT: pcmpeqd %xmm1, %xmm1
+; GFNISSE-NEXT: paddb %xmm0, %xmm1
+; GFNISSE-NEXT: pandn %xmm0, %xmm1
+; GFNISSE-NEXT: gf2p8affineqb $8, %xmm2, %xmm1
; GFNISSE-NEXT: movdqa %xmm1, %xmm0
; GFNISSE-NEXT: retq
;
-; GFNIAVX1OR2-LABEL: testv16i8u:
-; GFNIAVX1OR2: # %bb.0:
-; GFNIAVX1OR2-NEXT: vmovq {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX1OR2-NEXT: vpshufb %xmm0, %xmm1, %xmm2
-; GFNIAVX1OR2-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; GFNIAVX1OR2-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; GFNIAVX1OR2-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm3
-; GFNIAVX1OR2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; GFNIAVX1OR2-NEXT: vpshufb %xmm0, %xmm1, %xmm0
-; GFNIAVX1OR2-NEXT: vpaddb %xmm0, %xmm2, %xmm0
-; GFNIAVX1OR2-NEXT: retq
+; GFNIAVX1-LABEL: testv16i8u:
+; GFNIAVX1: # %bb.0:
+; GFNIAVX1-NEXT: vmovddup {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX1-NEXT: # xmm1 = mem[0,0]
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0
+; GFNIAVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; GFNIAVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm2
+; GFNIAVX1-NEXT: vpandn %xmm0, %xmm2, %xmm0
+; GFNIAVX1-NEXT: vgf2p8affineqb $8, %xmm1, %xmm0, %xmm0
+; GFNIAVX1-NEXT: retq
+;
+; GFNIAVX2-LABEL: testv16i8u:
+; GFNIAVX2: # %bb.0:
+; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX2-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0
+; GFNIAVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; GFNIAVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm2
+; GFNIAVX2-NEXT: vpandn %xmm0, %xmm2, %xmm0
+; GFNIAVX2-NEXT: vgf2p8affineqb $8, %xmm1, %xmm0, %xmm0
+; GFNIAVX2-NEXT: retq
;
; GFNIAVX512-LABEL: testv16i8u:
; GFNIAVX512: # %bb.0:
-; GFNIAVX512-NEXT: vmovq {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX512-NEXT: vpshufb %xmm0, %xmm1, %xmm2
-; GFNIAVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; GFNIAVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; GFNIAVX512-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm3
-; GFNIAVX512-NEXT: vpand %xmm3, %xmm2, %xmm2
-; GFNIAVX512-NEXT: vpshufb %xmm0, %xmm1, %xmm0
-; GFNIAVX512-NEXT: vpaddb %xmm0, %xmm2, %xmm0
+; GFNIAVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX512-NEXT: vgf2p8affineqb $0, %xmm1, %xmm0, %xmm0
+; GFNIAVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; GFNIAVX512-NEXT: vpaddb %xmm2, %xmm0, %xmm2
+; GFNIAVX512-NEXT: vpandn %xmm0, %xmm2, %xmm0
+; GFNIAVX512-NEXT: vgf2p8affineqb $8, %xmm1, %xmm0, %xmm0
; GFNIAVX512-NEXT: retq
%out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %in, i1 -1)
ret <16 x i8> %out
@@ -92,73 +100,52 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind {
define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
; GFNISSE-LABEL: testv32i8:
; GFNISSE: # %bb.0:
-; GFNISSE-NEXT: movq {{.*#+}} xmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNISSE-NEXT: movdqa %xmm2, %xmm3
-; GFNISSE-NEXT: pshufb %xmm0, %xmm3
-; GFNISSE-NEXT: movdqa {{.*#+}} xmm4 = [0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16]
+; GFNISSE-NEXT: movdqa {{.*#+}} xmm4 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
; GFNISSE-NEXT: gf2p8affineqb $0, %xmm4, %xmm0
-; GFNISSE-NEXT: pxor %xmm5, %xmm5
-; GFNISSE-NEXT: movdqa %xmm2, %xmm6
-; GFNISSE-NEXT: pshufb %xmm0, %xmm6
-; GFNISSE-NEXT: pcmpeqb %xmm5, %xmm0
-; GFNISSE-NEXT: pand %xmm3, %xmm0
-; GFNISSE-NEXT: paddb %xmm6, %xmm0
-; GFNISSE-NEXT: movdqa %xmm2, %xmm3
-; GFNISSE-NEXT: pshufb %xmm1, %xmm3
+; GFNISSE-NEXT: pcmpeqd %xmm2, %xmm2
+; GFNISSE-NEXT: movdqa %xmm0, %xmm3
+; GFNISSE-NEXT: paddb %xmm2, %xmm3
+; GFNISSE-NEXT: pandn %xmm0, %xmm3
+; GFNISSE-NEXT: gf2p8affineqb $8, %xmm4, %xmm3
; GFNISSE-NEXT: gf2p8affineqb $0, %xmm4, %xmm1
-; GFNISSE-NEXT: pcmpeqb %xmm1, %xmm5
-; GFNISSE-NEXT: pand %xmm3, %xmm5
-; GFNISSE-NEXT: pshufb %xmm1, %xmm2
-; GFNISSE-NEXT: paddb %xmm5, %xmm2
+; GFNISSE-NEXT: paddb %xmm1, %xmm2
+; GFNISSE-NEXT: pandn %xmm1, %xmm2
+; GFNISSE-NEXT: gf2p8affineqb $8, %xmm4, %xmm2
+; GFNISSE-NEXT: movdqa %xmm3, %xmm0
; GFNISSE-NEXT: movdqa %xmm2, %xmm1
; GFNISSE-NEXT: retq
;
; GFNIAVX1-LABEL: testv32i8:
; GFNIAVX1: # %bb.0:
-; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; GFNIAVX1-NEXT: vmovq {{.*#+}} xmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm3
-; GFNIAVX1-NEXT: vmovddup {{.*#+}} xmm4 = [0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16]
-; GFNIAVX1-NEXT: # xmm4 = mem[0,0]
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm4, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
-; GFNIAVX1-NEXT: vpcmpeqb %xmm5, %xmm1, %xmm6
-; GFNIAVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
-; GFNIAVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1
-; GFNIAVX1-NEXT: vpaddb %xmm1, %xmm3, %xmm1
-; GFNIAVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm3
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm4, %xmm0, %xmm0
-; GFNIAVX1-NEXT: vpcmpeqb %xmm5, %xmm0, %xmm4
-; GFNIAVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
-; GFNIAVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm0
-; GFNIAVX1-NEXT: vpaddb %xmm0, %xmm3, %xmm0
-; GFNIAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0
+; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; GFNIAVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; GFNIAVX1-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; GFNIAVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm3
+; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; GFNIAVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0
+; GFNIAVX1-NEXT: vgf2p8affineqb $8, %ymm1, %ymm0, %ymm0
; GFNIAVX1-NEXT: retq
;
; GFNIAVX2-LABEL: testv32i8:
; GFNIAVX2: # %bb.0:
-; GFNIAVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX2-NEXT: # ymm1 = mem[0,1,0,1]
-; GFNIAVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
-; GFNIAVX2-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; GFNIAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; GFNIAVX2-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
-; GFNIAVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
-; GFNIAVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm0
-; GFNIAVX2-NEXT: vpaddb %ymm0, %ymm2, %ymm0
+; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0
+; GFNIAVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; GFNIAVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm2
+; GFNIAVX2-NEXT: vpandn %ymm0, %ymm2, %ymm0
+; GFNIAVX2-NEXT: vgf2p8affineqb $8, %ymm1, %ymm0, %ymm0
; GFNIAVX2-NEXT: retq
;
; GFNIAVX512-LABEL: testv32i8:
; GFNIAVX512: # %bb.0:
-; GFNIAVX512-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX512-NEXT: # ymm1 = mem[0,1,0,1]
-; GFNIAVX512-NEXT: vpshufb %ymm0, %ymm1, %ymm2
-; GFNIAVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
-; GFNIAVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; GFNIAVX512-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
-; GFNIAVX512-NEXT: vpand %ymm3, %ymm2, %ymm2
-; GFNIAVX512-NEXT: vpshufb %ymm0, %ymm1, %ymm0
-; GFNIAVX512-NEXT: vpaddb %ymm0, %ymm2, %ymm0
+; GFNIAVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX512-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0
+; GFNIAVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; GFNIAVX512-NEXT: vpaddb %ymm2, %ymm0, %ymm2
+; GFNIAVX512-NEXT: vpandn %ymm0, %ymm2, %ymm0
+; GFNIAVX512-NEXT: vgf2p8affineqb $8, %ymm1, %ymm0, %ymm0
; GFNIAVX512-NEXT: retq
%out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 0)
ret <32 x i8> %out
@@ -167,73 +154,52 @@ define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
; GFNISSE-LABEL: testv32i8u:
; GFNISSE: # %bb.0:
-; GFNISSE-NEXT: movq {{.*#+}} xmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNISSE-NEXT: movdqa %xmm2, %xmm3
-; GFNISSE-NEXT: pshufb %xmm0, %xmm3
-; GFNISSE-NEXT: movdqa {{.*#+}} xmm4 = [0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16]
+; GFNISSE-NEXT: movdqa {{.*#+}} xmm4 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
; GFNISSE-NEXT: gf2p8affineqb $0, %xmm4, %xmm0
-; GFNISSE-NEXT: pxor %xmm5, %xmm5
-; GFNISSE-NEXT: movdqa %xmm2, %xmm6
-; GFNISSE-NEXT: pshufb %xmm0, %xmm6
-; GFNISSE-NEXT: pcmpeqb %xmm5, %xmm0
-; GFNISSE-NEXT: pand %xmm3, %xmm0
-; GFNISSE-NEXT: paddb %xmm6, %xmm0
-; GFNISSE-NEXT: movdqa %xmm2, %xmm3
-; GFNISSE-NEXT: pshufb %xmm1, %xmm3
+; GFNISSE-NEXT: pcmpeqd %xmm2, %xmm2
+; GFNISSE-NEXT: movdqa %xmm0, %xmm3
+; GFNISSE-NEXT: paddb %xmm2, %xmm3
+; GFNISSE-NEXT: pandn %xmm0, %xmm3
+; GFNISSE-NEXT: gf2p8affineqb $8, %xmm4, %xmm3
; GFNISSE-NEXT: gf2p8affineqb $0, %xmm4, %xmm1
-; GFNISSE-NEXT: pcmpeqb %xmm1, %xmm5
-; GFNISSE-NEXT: pand %xmm3, %xmm5
-; GFNISSE-NEXT: pshufb %xmm1, %xmm2
-; GFNISSE-NEXT: paddb %xmm5, %xmm2
+; GFNISSE-NEXT: paddb %xmm1, %xmm2
+; GFNISSE-NEXT: pandn %xmm1, %xmm2
+; GFNISSE-NEXT: gf2p8affineqb $8, %xmm4, %xmm2
+; GFNISSE-NEXT: movdqa %xmm3, %xmm0
; GFNISSE-NEXT: movdqa %xmm2, %xmm1
; GFNISSE-NEXT: retq
;
; GFNIAVX1-LABEL: testv32i8u:
; GFNIAVX1: # %bb.0:
-; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; GFNIAVX1-NEXT: vmovq {{.*#+}} xmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm3
-; GFNIAVX1-NEXT: vmovddup {{.*#+}} xmm4 = [0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16]
-; GFNIAVX1-NEXT: # xmm4 = mem[0,0]
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm4, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
-; GFNIAVX1-NEXT: vpcmpeqb %xmm5, %xmm1, %xmm6
-; GFNIAVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
-; GFNIAVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1
-; GFNIAVX1-NEXT: vpaddb %xmm1, %xmm3, %xmm1
-; GFNIAVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm3
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm4, %xmm0, %xmm0
-; GFNIAVX1-NEXT: vpcmpeqb %xmm5, %xmm0, %xmm4
-; GFNIAVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
-; GFNIAVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm0
-; GFNIAVX1-NEXT: vpaddb %xmm0, %xmm3, %xmm0
-; GFNIAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0
+; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; GFNIAVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; GFNIAVX1-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; GFNIAVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm3
+; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; GFNIAVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0
+; GFNIAVX1-NEXT: vgf2p8affineqb $8, %ymm1, %ymm0, %ymm0
; GFNIAVX1-NEXT: retq
;
; GFNIAVX2-LABEL: testv32i8u:
; GFNIAVX2: # %bb.0:
-; GFNIAVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX2-NEXT: # ymm1 = mem[0,1,0,1]
-; GFNIAVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
-; GFNIAVX2-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; GFNIAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; GFNIAVX2-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
-; GFNIAVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
-; GFNIAVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm0
-; GFNIAVX2-NEXT: vpaddb %ymm0, %ymm2, %ymm0
+; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0
+; GFNIAVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; GFNIAVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm2
+; GFNIAVX2-NEXT: vpandn %ymm0, %ymm2, %ymm0
+; GFNIAVX2-NEXT: vgf2p8affineqb $8, %ymm1, %ymm0, %ymm0
; GFNIAVX2-NEXT: retq
;
; GFNIAVX512-LABEL: testv32i8u:
; GFNIAVX512: # %bb.0:
-; GFNIAVX512-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX512-NEXT: # ymm1 = mem[0,1,0,1]
-; GFNIAVX512-NEXT: vpshufb %ymm0, %ymm1, %ymm2
-; GFNIAVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
-; GFNIAVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; GFNIAVX512-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
-; GFNIAVX512-NEXT: vpand %ymm3, %ymm2, %ymm2
-; GFNIAVX512-NEXT: vpshufb %ymm0, %ymm1, %ymm0
-; GFNIAVX512-NEXT: vpaddb %ymm0, %ymm2, %ymm0
+; GFNIAVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX512-NEXT: vgf2p8affineqb $0, %ymm1, %ymm0, %ymm0
+; GFNIAVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; GFNIAVX512-NEXT: vpaddb %ymm2, %ymm0, %ymm2
+; GFNIAVX512-NEXT: vpandn %ymm0, %ymm2, %ymm0
+; GFNIAVX512-NEXT: vgf2p8affineqb $8, %ymm1, %ymm0, %ymm0
; GFNIAVX512-NEXT: retq
%out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 -1)
ret <32 x i8> %out
@@ -242,130 +208,88 @@ define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
; GFNISSE-LABEL: testv64i8:
; GFNISSE: # %bb.0:
-; GFNISSE-NEXT: movdqa %xmm3, %xmm4
-; GFNISSE-NEXT: movq {{.*#+}} xmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNISSE-NEXT: movdqa %xmm3, %xmm7
-; GFNISSE-NEXT: pshufb %xmm0, %xmm7
-; GFNISSE-NEXT: movdqa {{.*#+}} xmm6 = [0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16]
-; GFNISSE-NEXT: gf2p8affineqb $0, %xmm6, %xmm0
-; GFNISSE-NEXT: pxor %xmm5, %xmm5
-; GFNISSE-NEXT: movdqa %xmm3, %xmm8
-; GFNISSE-NEXT: pshufb %xmm0, %xmm8
-; GFNISSE-NEXT: pcmpeqb %xmm5, %xmm0
-; GFNISSE-NEXT: pand %xmm7, %xmm0
-; GFNISSE-NEXT: paddb %xmm8, %xmm0
-; GFNISSE-NEXT: movdqa %xmm3, %xmm7
-; GFNISSE-NEXT: pshufb %xmm1, %xmm7
-; GFNISSE-NEXT: gf2p8affineqb $0, %xmm6, %xmm1
-; GFNISSE-NEXT: movdqa %xmm3, %xmm8
-; GFNISSE-NEXT: pshufb %xmm1, %xmm8
-; GFNISSE-NEXT: pcmpeqb %xmm5, %xmm1
-; GFNISSE-NEXT: pand %xmm7, %xmm1
-; GFNISSE-NEXT: paddb %xmm8, %xmm1
-; GFNISSE-NEXT: movdqa %xmm3, %xmm7
-; GFNISSE-NEXT: pshufb %xmm2, %xmm7
-; GFNISSE-NEXT: gf2p8affineqb $0, %xmm6, %xmm2
-; GFNISSE-NEXT: movdqa %xmm3, %xmm8
-; GFNISSE-NEXT: pshufb %xmm2, %xmm8
-; GFNISSE-NEXT: pcmpeqb %xmm5, %xmm2
-; GFNISSE-NEXT: pand %xmm7, %xmm2
-; GFNISSE-NEXT: paddb %xmm8, %xmm2
-; GFNISSE-NEXT: movdqa %xmm3, %xmm7
-; GFNISSE-NEXT: pshufb %xmm4, %xmm7
-; GFNISSE-NEXT: gf2p8affineqb $0, %xmm6, %xmm4
-; GFNISSE-NEXT: pcmpeqb %xmm4, %xmm5
-; GFNISSE-NEXT: pand %xmm7, %xmm5
-; GFNISSE-NEXT: pshufb %xmm4, %xmm3
-; GFNISSE-NEXT: paddb %xmm5, %xmm3
+; GFNISSE-NEXT: movdqa {{.*#+}} xmm8 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm8, %xmm0
+; GFNISSE-NEXT: pcmpeqd %xmm4, %xmm4
+; GFNISSE-NEXT: movdqa %xmm0, %xmm5
+; GFNISSE-NEXT: paddb %xmm4, %xmm5
+; GFNISSE-NEXT: pandn %xmm0, %xmm5
+; GFNISSE-NEXT: gf2p8affineqb $8, %xmm8, %xmm5
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm8, %xmm1
+; GFNISSE-NEXT: movdqa %xmm1, %xmm6
+; GFNISSE-NEXT: paddb %xmm4, %xmm6
+; GFNISSE-NEXT: pandn %xmm1, %xmm6
+; GFNISSE-NEXT: gf2p8affineqb $8, %xmm8, %xmm6
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm8, %xmm2
+; GFNISSE-NEXT: movdqa %xmm2, %xmm7
+; GFNISSE-NEXT: paddb %xmm4, %xmm7
+; GFNISSE-NEXT: pandn %xmm2, %xmm7
+; GFNISSE-NEXT: gf2p8affineqb $8, %xmm8, %xmm7
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm8, %xmm3
+; GFNISSE-NEXT: paddb %xmm3, %xmm4
+; GFNISSE-NEXT: pandn %xmm3, %xmm4
+; GFNISSE-NEXT: gf2p8affineqb $8, %xmm8, %xmm4
+; GFNISSE-NEXT: movdqa %xmm5, %xmm0
+; GFNISSE-NEXT: movdqa %xmm6, %xmm1
+; GFNISSE-NEXT: movdqa %xmm7, %xmm2
+; GFNISSE-NEXT: movdqa %xmm4, %xmm3
; GFNISSE-NEXT: retq
;
; GFNIAVX1-LABEL: testv64i8:
; GFNIAVX1: # %bb.0:
-; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; GFNIAVX1-NEXT: vmovq {{.*#+}} xmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm4
-; GFNIAVX1-NEXT: vmovddup {{.*#+}} xmm5 = [0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16]
-; GFNIAVX1-NEXT: # xmm5 = mem[0,0]
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm5, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
-; GFNIAVX1-NEXT: vpcmpeqb %xmm6, %xmm2, %xmm7
-; GFNIAVX1-NEXT: vpand %xmm7, %xmm4, %xmm4
-; GFNIAVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; GFNIAVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; GFNIAVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm4
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm5, %xmm0, %xmm0
-; GFNIAVX1-NEXT: vpcmpeqb %xmm6, %xmm0, %xmm7
-; GFNIAVX1-NEXT: vpand %xmm7, %xmm4, %xmm4
-; GFNIAVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
-; GFNIAVX1-NEXT: vpaddb %xmm0, %xmm4, %xmm0
-; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; GFNIAVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm4
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm5, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vpcmpeqb %xmm6, %xmm2, %xmm7
-; GFNIAVX1-NEXT: vpand %xmm7, %xmm4, %xmm4
-; GFNIAVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; GFNIAVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; GFNIAVX1-NEXT: vpshufb %xmm1, %xmm3, %xmm4
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm5, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vpcmpeqb %xmm6, %xmm1, %xmm5
-; GFNIAVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
-; GFNIAVX1-NEXT: vpshufb %xmm1, %xmm3, %xmm1
-; GFNIAVX1-NEXT: vpaddb %xmm1, %xmm4, %xmm1
-; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm2, %ymm0, %ymm0
+; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; GFNIAVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; GFNIAVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3
+; GFNIAVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm5
+; GFNIAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
+; GFNIAVX1-NEXT: vandnps %ymm0, %ymm3, %ymm0
+; GFNIAVX1-NEXT: vgf2p8affineqb $8, %ymm2, %ymm0, %ymm0
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm2, %ymm1, %ymm1
+; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; GFNIAVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3
+; GFNIAVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm4
+; GFNIAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
+; GFNIAVX1-NEXT: vandnps %ymm1, %ymm3, %ymm1
+; GFNIAVX1-NEXT: vgf2p8affineqb $8, %ymm2, %ymm1, %ymm1
; GFNIAVX1-NEXT: retq
;
; GFNIAVX2-LABEL: testv64i8:
; GFNIAVX2: # %bb.0:
-; GFNIAVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX2-NEXT: # ymm2 = mem[0,1,0,1]
-; GFNIAVX2-NEXT: vpshufb %ymm0, %ymm2, %ymm3
-; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16]
-; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm4, %ymm0, %ymm0
-; GFNIAVX2-NEXT: vpxor %xmm5, %xmm5, %xmm5
-; GFNIAVX2-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm6
-; GFNIAVX2-NEXT: vpand %ymm6, %ymm3, %ymm3
-; GFNIAVX2-NEXT: vpshufb %ymm0, %ymm2, %ymm0
-; GFNIAVX2-NEXT: vpaddb %ymm0, %ymm3, %ymm0
-; GFNIAVX2-NEXT: vpshufb %ymm1, %ymm2, %ymm3
-; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm4, %ymm1, %ymm1
-; GFNIAVX2-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm4
-; GFNIAVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
-; GFNIAVX2-NEXT: vpshufb %ymm1, %ymm2, %ymm1
-; GFNIAVX2-NEXT: vpaddb %ymm1, %ymm3, %ymm1
+; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm2, %ymm0, %ymm0
+; GFNIAVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; GFNIAVX2-NEXT: vpaddb %ymm3, %ymm0, %ymm4
+; GFNIAVX2-NEXT: vpandn %ymm0, %ymm4, %ymm0
+; GFNIAVX2-NEXT: vgf2p8affineqb $8, %ymm2, %ymm0, %ymm0
+; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm2, %ymm1, %ymm1
+; GFNIAVX2-NEXT: vpaddb %ymm3, %ymm1, %ymm3
+; GFNIAVX2-NEXT: vpandn %ymm1, %ymm3, %ymm1
+; GFNIAVX2-NEXT: vgf2p8affineqb $8, %ymm2, %ymm1, %ymm1
; GFNIAVX2-NEXT: retq
;
; GFNIAVX512VL-LABEL: testv64i8:
; GFNIAVX512VL: # %bb.0:
-; GFNIAVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; GFNIAVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX512VL-NEXT: # ymm2 = mem[0,1,0,1]
-; GFNIAVX512VL-NEXT: vpshufb %ymm1, %ymm2, %ymm3
-; GFNIAVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm4 = [0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16]
-; GFNIAVX512VL-NEXT: vgf2p8affineqb $0, %ymm4, %ymm1, %ymm1
-; GFNIAVX512VL-NEXT: vpxor %xmm5, %xmm5, %xmm5
-; GFNIAVX512VL-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm6
-; GFNIAVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
-; GFNIAVX512VL-NEXT: vpshufb %ymm1, %ymm2, %ymm1
-; GFNIAVX512VL-NEXT: vpaddb %ymm1, %ymm3, %ymm1
-; GFNIAVX512VL-NEXT: vpshufb %ymm0, %ymm2, %ymm3
-; GFNIAVX512VL-NEXT: vgf2p8affineqb $0, %ymm4, %ymm0, %ymm0
-; GFNIAVX512VL-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm4
-; GFNIAVX512VL-NEXT: vpand %ymm4, %ymm3, %ymm3
-; GFNIAVX512VL-NEXT: vpshufb %ymm0, %ymm2, %ymm0
-; GFNIAVX512VL-NEXT: vpaddb %ymm0, %ymm3, %ymm0
-; GFNIAVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; GFNIAVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX512VL-NEXT: vgf2p8affineqb $0, %zmm1, %zmm0, %zmm0
+; GFNIAVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; GFNIAVX512VL-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; GFNIAVX512VL-NEXT: vpaddb %ymm3, %ymm2, %ymm2
+; GFNIAVX512VL-NEXT: vpaddb %ymm3, %ymm0, %ymm3
+; GFNIAVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
+; GFNIAVX512VL-NEXT: vpandnq %zmm0, %zmm2, %zmm0
+; GFNIAVX512VL-NEXT: vgf2p8affineqb $8, %zmm1, %zmm0, %zmm0
; GFNIAVX512VL-NEXT: retq
;
; GFNIAVX512BW-LABEL: testv64i8:
; GFNIAVX512BW: # %bb.0:
-; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm1
-; GFNIAVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
-; GFNIAVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX512BW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; GFNIAVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm0 {%k1} {z}
-; GFNIAVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm1
-; GFNIAVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; GFNIAVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, %zmm1, %zmm0, %zmm0
+; GFNIAVX512BW-NEXT: vpternlogd {{.*#+}} zmm2 = -1
+; GFNIAVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm2
+; GFNIAVX512BW-NEXT: vpandnq %zmm0, %zmm2, %zmm0
+; GFNIAVX512BW-NEXT: vgf2p8affineqb $8, %zmm1, %zmm0, %zmm0
; GFNIAVX512BW-NEXT: retq
%out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 0)
ret <64 x i8> %out
@@ -374,133 +298,92 @@ define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind {
; GFNISSE-LABEL: testv64i8u:
; GFNISSE: # %bb.0:
-; GFNISSE-NEXT: movdqa %xmm3, %xmm4
-; GFNISSE-NEXT: movq {{.*#+}} xmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNISSE-NEXT: movdqa %xmm3, %xmm7
-; GFNISSE-NEXT: pshufb %xmm0, %xmm7
-; GFNISSE-NEXT: movdqa {{.*#+}} xmm6 = [0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16]
-; GFNISSE-NEXT: gf2p8affineqb $0, %xmm6, %xmm0
-; GFNISSE-NEXT: pxor %xmm5, %xmm5
-; GFNISSE-NEXT: movdqa %xmm3, %xmm8
-; GFNISSE-NEXT: pshufb %xmm0, %xmm8
-; GFNISSE-NEXT: pcmpeqb %xmm5, %xmm0
-; GFNISSE-NEXT: pand %xmm7, %xmm0
-; GFNISSE-NEXT: paddb %xmm8, %xmm0
-; GFNISSE-NEXT: movdqa %xmm3, %xmm7
-; GFNISSE-NEXT: pshufb %xmm1, %xmm7
-; GFNISSE-NEXT: gf2p8affineqb $0, %xmm6, %xmm1
-; GFNISSE-NEXT: movdqa %xmm3, %xmm8
-; GFNISSE-NEXT: pshufb %xmm1, %xmm8
-; GFNISSE-NEXT: pcmpeqb %xmm5, %xmm1
-; GFNISSE-NEXT: pand %xmm7, %xmm1
-; GFNISSE-NEXT: paddb %xmm8, %xmm1
-; GFNISSE-NEXT: movdqa %xmm3, %xmm7
-; GFNISSE-NEXT: pshufb %xmm2, %xmm7
-; GFNISSE-NEXT: gf2p8affineqb $0, %xmm6, %xmm2
-; GFNISSE-NEXT: movdqa %xmm3, %xmm8
-; GFNISSE-NEXT: pshufb %xmm2, %xmm8
-; GFNISSE-NEXT: pcmpeqb %xmm5, %xmm2
-; GFNISSE-NEXT: pand %xmm7, %xmm2
-; GFNISSE-NEXT: paddb %xmm8, %xmm2
-; GFNISSE-NEXT: movdqa %xmm3, %xmm7
-; GFNISSE-NEXT: pshufb %xmm4, %xmm7
-; GFNISSE-NEXT: gf2p8affineqb $0, %xmm6, %xmm4
-; GFNISSE-NEXT: pcmpeqb %xmm4, %xmm5
-; GFNISSE-NEXT: pand %xmm7, %xmm5
-; GFNISSE-NEXT: pshufb %xmm4, %xmm3
-; GFNISSE-NEXT: paddb %xmm5, %xmm3
+; GFNISSE-NEXT: movdqa {{.*#+}} xmm8 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm8, %xmm0
+; GFNISSE-NEXT: pcmpeqd %xmm4, %xmm4
+; GFNISSE-NEXT: movdqa %xmm0, %xmm5
+; GFNISSE-NEXT: paddb %xmm4, %xmm5
+; GFNISSE-NEXT: pandn %xmm0, %xmm5
+; GFNISSE-NEXT: gf2p8affineqb $8, %xmm8, %xmm5
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm8, %xmm1
+; GFNISSE-NEXT: movdqa %xmm1, %xmm6
+; GFNISSE-NEXT: paddb %xmm4, %xmm6
+; GFNISSE-NEXT: pandn %xmm1, %xmm6
+; GFNISSE-NEXT: gf2p8affineqb $8, %xmm8, %xmm6
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm8, %xmm2
+; GFNISSE-NEXT: movdqa %xmm2, %xmm7
+; GFNISSE-NEXT: paddb %xmm4, %xmm7
+; GFNISSE-NEXT: pandn %xmm2, %xmm7
+; GFNISSE-NEXT: gf2p8affineqb $8, %xmm8, %xmm7
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm8, %xmm3
+; GFNISSE-NEXT: paddb %xmm3, %xmm4
+; GFNISSE-NEXT: pandn %xmm3, %xmm4
+; GFNISSE-NEXT: gf2p8affineqb $8, %xmm8, %xmm4
+; GFNISSE-NEXT: movdqa %xmm5, %xmm0
+; GFNISSE-NEXT: movdqa %xmm6, %xmm1
+; GFNISSE-NEXT: movdqa %xmm7, %xmm2
+; GFNISSE-NEXT: movdqa %xmm4, %xmm3
; GFNISSE-NEXT: retq
;
; GFNIAVX1-LABEL: testv64i8u:
; GFNIAVX1: # %bb.0:
-; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; GFNIAVX1-NEXT: vmovq {{.*#+}} xmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm4
-; GFNIAVX1-NEXT: vmovddup {{.*#+}} xmm5 = [0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16]
-; GFNIAVX1-NEXT: # xmm5 = mem[0,0]
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm5, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
-; GFNIAVX1-NEXT: vpcmpeqb %xmm6, %xmm2, %xmm7
-; GFNIAVX1-NEXT: vpand %xmm7, %xmm4, %xmm4
-; GFNIAVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; GFNIAVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; GFNIAVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm4
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm5, %xmm0, %xmm0
-; GFNIAVX1-NEXT: vpcmpeqb %xmm6, %xmm0, %xmm7
-; GFNIAVX1-NEXT: vpand %xmm7, %xmm4, %xmm4
-; GFNIAVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
-; GFNIAVX1-NEXT: vpaddb %xmm0, %xmm4, %xmm0
-; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; GFNIAVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm4
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm5, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vpcmpeqb %xmm6, %xmm2, %xmm7
-; GFNIAVX1-NEXT: vpand %xmm7, %xmm4, %xmm4
-; GFNIAVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; GFNIAVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; GFNIAVX1-NEXT: vpshufb %xmm1, %xmm3, %xmm4
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm5, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vpcmpeqb %xmm6, %xmm1, %xmm5
-; GFNIAVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
-; GFNIAVX1-NEXT: vpshufb %xmm1, %xmm3, %xmm1
-; GFNIAVX1-NEXT: vpaddb %xmm1, %xmm4, %xmm1
-; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm2, %ymm0, %ymm0
+; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; GFNIAVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; GFNIAVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3
+; GFNIAVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm5
+; GFNIAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm5, %ymm3
+; GFNIAVX1-NEXT: vandnps %ymm0, %ymm3, %ymm0
+; GFNIAVX1-NEXT: vgf2p8affineqb $8, %ymm2, %ymm0, %ymm0
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm2, %ymm1, %ymm1
+; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; GFNIAVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3
+; GFNIAVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm4
+; GFNIAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
+; GFNIAVX1-NEXT: vandnps %ymm1, %ymm3, %ymm1
+; GFNIAVX1-NEXT: vgf2p8affineqb $8, %ymm2, %ymm1, %ymm1
; GFNIAVX1-NEXT: retq
;
; GFNIAVX2-LABEL: testv64i8u:
; GFNIAVX2: # %bb.0:
-; GFNIAVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX2-NEXT: # ymm2 = mem[0,1,0,1]
-; GFNIAVX2-NEXT: vpshufb %ymm0, %ymm2, %ymm3
-; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16]
-; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm4, %ymm0, %ymm0
-; GFNIAVX2-NEXT: vpxor %xmm5, %xmm5, %xmm5
-; GFNIAVX2-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm6
-; GFNIAVX2-NEXT: vpand %ymm6, %ymm3, %ymm3
-; GFNIAVX2-NEXT: vpshufb %ymm0, %ymm2, %ymm0
-; GFNIAVX2-NEXT: vpaddb %ymm0, %ymm3, %ymm0
-; GFNIAVX2-NEXT: vpshufb %ymm1, %ymm2, %ymm3
-; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm4, %ymm1, %ymm1
-; GFNIAVX2-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm4
-; GFNIAVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
-; GFNIAVX2-NEXT: vpshufb %ymm1, %ymm2, %ymm1
-; GFNIAVX2-NEXT: vpaddb %ymm1, %ymm3, %ymm1
+; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm2, %ymm0, %ymm0
+; GFNIAVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; GFNIAVX2-NEXT: vpaddb %ymm3, %ymm0, %ymm4
+; GFNIAVX2-NEXT: vpandn %ymm0, %ymm4, %ymm0
+; GFNIAVX2-NEXT: vgf2p8affineqb $8, %ymm2, %ymm0, %ymm0
+; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm2, %ymm1, %ymm1
+; GFNIAVX2-NEXT: vpaddb %ymm3, %ymm1, %ymm3
+; GFNIAVX2-NEXT: vpandn %ymm1, %ymm3, %ymm1
+; GFNIAVX2-NEXT: vgf2p8affineqb $8, %ymm2, %ymm1, %ymm1
; GFNIAVX2-NEXT: retq
;
; GFNIAVX512VL-LABEL: testv64i8u:
; GFNIAVX512VL: # %bb.0:
-; GFNIAVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; GFNIAVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX512VL-NEXT: # ymm2 = mem[0,1,0,1]
-; GFNIAVX512VL-NEXT: vpshufb %ymm1, %ymm2, %ymm3
-; GFNIAVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm4 = [0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16,0,0,0,0,128,64,32,16]
-; GFNIAVX512VL-NEXT: vgf2p8affineqb $0, %ymm4, %ymm1, %ymm1
-; GFNIAVX512VL-NEXT: vpxor %xmm5, %xmm5, %xmm5
-; GFNIAVX512VL-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm6
-; GFNIAVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
-; GFNIAVX512VL-NEXT: vpshufb %ymm1, %ymm2, %ymm1
-; GFNIAVX512VL-NEXT: vpaddb %ymm1, %ymm3, %ymm1
-; GFNIAVX512VL-NEXT: vpshufb %ymm0, %ymm2, %ymm3
-; GFNIAVX512VL-NEXT: vgf2p8affineqb $0, %ymm4, %ymm0, %ymm0
-; GFNIAVX512VL-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm4
-; GFNIAVX512VL-NEXT: vpand %ymm4, %ymm3, %ymm3
-; GFNIAVX512VL-NEXT: vpshufb %ymm0, %ymm2, %ymm0
-; GFNIAVX512VL-NEXT: vpaddb %ymm0, %ymm3, %ymm0
-; GFNIAVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; GFNIAVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX512VL-NEXT: vgf2p8affineqb $0, %zmm1, %zmm0, %zmm0
+; GFNIAVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; GFNIAVX512VL-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; GFNIAVX512VL-NEXT: vpaddb %ymm3, %ymm2, %ymm2
+; GFNIAVX512VL-NEXT: vpaddb %ymm3, %ymm0, %ymm3
+; GFNIAVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
+; GFNIAVX512VL-NEXT: vpandnq %zmm0, %zmm2, %zmm0
+; GFNIAVX512VL-NEXT: vgf2p8affineqb $8, %zmm1, %zmm0, %zmm0
; GFNIAVX512VL-NEXT: retq
;
; GFNIAVX512BW-LABEL: testv64i8u:
; GFNIAVX512BW: # %bb.0:
-; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm1
-; GFNIAVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
-; GFNIAVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; GFNIAVX512BW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; GFNIAVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm0 {%k1} {z}
-; GFNIAVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm1
-; GFNIAVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; GFNIAVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
+; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, %zmm1, %zmm0, %zmm0
+; GFNIAVX512BW-NEXT: vpternlogd {{.*#+}} zmm2 = -1
+; GFNIAVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm2
+; GFNIAVX512BW-NEXT: vpandnq %zmm0, %zmm2, %zmm0
+; GFNIAVX512BW-NEXT: vgf2p8affineqb $8, %zmm1, %zmm0, %zmm0
; GFNIAVX512BW-NEXT: retq
%out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 -1)
ret <64 x i8> %out
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFNIAVX: {{.*}}
+; GFNIAVX1OR2: {{.*}}
>From 37707f26421f949558202458f05458da9ad31232 Mon Sep 17 00:00:00 2001
From: william <we3223 at gmail.com>
Date: Thu, 29 May 2025 11:11:04 +0800
Subject: [PATCH 2/2] Fix clang-format style issues
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 1 -
llvm/test/CodeGen/X86/ctlz-gfni.ll | 11 ++++++-----
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7918a8e72adf6..6215b253374ab 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29040,7 +29040,6 @@ static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
SDLoc dl(Op);
unsigned Opc = Op.getOpcode();
-
if (VT.isVector() && VT.getScalarType() == MVT::i8 && Subtarget.hasGFNI())
return LowerVectorCTLZ_GFNI(Op, DAG, Subtarget);
diff --git a/llvm/test/CodeGen/X86/ctlz-gfni.ll b/llvm/test/CodeGen/X86/ctlz-gfni.ll
index d942f5ad3506f..40007d87a6f7c 100644
--- a/llvm/test/CodeGen/X86/ctlz-gfni.ll
+++ b/llvm/test/CodeGen/X86/ctlz-gfni.ll
@@ -2,11 +2,12 @@
define <16 x i8> @test_ctlz_gfni(<16 x i8> %x) {
; CHECK-LABEL: @test_ctlz_gfni
-; CHECK: gf2p8affineqb
-; CHECK: paddb
-; CHECK: pandn
-; CHECK: gf2p8affineqb
-; CHECK: ret
+; CHECK: vgf2p8affineqb
+; CHECK: paddb
+; CHECK: pandn
+; CHECK: vgf2p8affineqb
+; CHECK: ret
+
%r = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %x, i1 false)
ret <16 x i8> %r
}
More information about the llvm-commits
mailing list