[llvm] r258867 - [x86] make the subtarget member a const reference, not a pointer ; NFCI
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 26 14:08:58 PST 2016
Author: spatel
Date: Tue Jan 26 16:08:58 2016
New Revision: 258867
URL: http://llvm.org/viewvc/llvm-project?rev=258867&view=rev
Log:
[x86] make the subtarget member a const reference, not a pointer ; NFCI
It's passed in as a reference; it's not optional; it's not a pointer.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=258867&r1=258866&r2=258867&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jan 26 16:08:58 2016
@@ -71,9 +71,9 @@ static cl::opt<bool> ExperimentalVectorW
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI)
- : TargetLowering(TM), Subtarget(&STI) {
- X86ScalarSSEf64 = Subtarget->hasSSE2();
- X86ScalarSSEf32 = Subtarget->hasSSE1();
+ : TargetLowering(TM), Subtarget(STI) {
+ X86ScalarSSEf64 = Subtarget.hasSSE2();
+ X86ScalarSSEf32 = Subtarget.hasSSE1();
MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
// Set up the TargetLowering object.
@@ -86,24 +86,24 @@ X86TargetLowering::X86TargetLowering(con
// For 64-bit, since we have so many registers, use the ILP scheduler.
// For 32-bit, use the register pressure specific scheduling.
// For Atom, always use ILP scheduling.
- if (Subtarget->isAtom())
+ if (Subtarget.isAtom())
setSchedulingPreference(Sched::ILP);
- else if (Subtarget->is64Bit())
+ else if (Subtarget.is64Bit())
setSchedulingPreference(Sched::ILP);
else
setSchedulingPreference(Sched::RegPressure);
- const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
// Bypass expensive divides on Atom when compiling with O2.
if (TM.getOptLevel() >= CodeGenOpt::Default) {
- if (Subtarget->hasSlowDivide32())
+ if (Subtarget.hasSlowDivide32())
addBypassSlowDiv(32, 8);
- if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
+ if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
addBypassSlowDiv(64, 16);
}
- if (Subtarget->isTargetKnownWindowsMSVC()) {
+ if (Subtarget.isTargetKnownWindowsMSVC()) {
// Setup Windows compiler runtime calls.
setLibcallName(RTLIB::SDIV_I64, "_alldiv");
setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
@@ -117,11 +117,11 @@ X86TargetLowering::X86TargetLowering(con
setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
}
- if (Subtarget->isTargetDarwin()) {
+ if (Subtarget.isTargetDarwin()) {
// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(false);
setUseUnderscoreLongJmp(false);
- } else if (Subtarget->isTargetWindowsGNU()) {
+ } else if (Subtarget.isTargetWindowsGNU()) {
// MS runtime is weird: it exports _setjmp, but longjmp!
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(false);
@@ -134,7 +134,7 @@ X86TargetLowering::X86TargetLowering(con
addRegisterClass(MVT::i8, &X86::GR8RegClass);
addRegisterClass(MVT::i16, &X86::GR16RegClass);
addRegisterClass(MVT::i32, &X86::GR32RegClass);
- if (Subtarget->is64Bit())
+ if (Subtarget.is64Bit())
addRegisterClass(MVT::i64, &X86::GR64RegClass);
for (MVT VT : MVT::integer_valuetypes())
@@ -164,14 +164,14 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
- if (Subtarget->is64Bit()) {
- if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512())
+ if (Subtarget.is64Bit()) {
+ if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
// f32/f64 are legal, f80 is custom.
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
else
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
- } else if (!Subtarget->useSoftFloat()) {
+ } else if (!Subtarget.useSoftFloat()) {
// We have an algorithm for SSE2->double, and we turn this into a
// 64-bit FILD followed by conditional FADD for other targets.
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
@@ -185,7 +185,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
- if (!Subtarget->useSoftFloat()) {
+ if (!Subtarget.useSoftFloat()) {
// SSE has no i16 to fp conversion, only i32
if (X86ScalarSSEf32) {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
@@ -205,7 +205,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
- if (!Subtarget->useSoftFloat()) {
+ if (!Subtarget.useSoftFloat()) {
// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
// are Legal, f80 is custom lowered.
setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
@@ -231,8 +231,8 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
- if (Subtarget->is64Bit()) {
- if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
+ if (Subtarget.is64Bit()) {
+ if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
// FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
@@ -240,9 +240,9 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
}
- } else if (!Subtarget->useSoftFloat()) {
+ } else if (!Subtarget.useSoftFloat()) {
// Since AVX is a superset of SSE3, only check for SSE here.
- if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
+ if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
// Expand FP_TO_UINT into a select.
// FIXME: We would like to use a Custom expander here eventually to do
// the optimal thing for SSE vs. the default expansion in the legalizer.
@@ -260,12 +260,12 @@ X86TargetLowering::X86TargetLowering(con
if (!X86ScalarSSEf64) {
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
// Without SSE, i64->f64 goes through memory.
setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
}
- } else if (!Subtarget->is64Bit())
+ } else if (!Subtarget.is64Bit())
setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
// Scalar integer divide and remainder are lowered to use operations that
@@ -311,14 +311,14 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::SELECT_CC , MVT::i16, Expand);
setOperationAction(ISD::SELECT_CC , MVT::i32, Expand);
setOperationAction(ISD::SELECT_CC , MVT::i64, Expand);
- if (Subtarget->is64Bit())
+ if (Subtarget.is64Bit())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
- if (Subtarget->is32Bit() && Subtarget->isTargetKnownWindowsMSVC()) {
+ if (Subtarget.is32Bit() && Subtarget.isTargetKnownWindowsMSVC()) {
// On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
// is. We should promote the value to 64-bits to solve this.
// This is what the CRT headers do - `fmodf` is an inline header
@@ -338,19 +338,19 @@ X86TargetLowering::X86TargetLowering(con
AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32);
setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote);
AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32);
- if (Subtarget->hasBMI()) {
+ if (Subtarget.hasBMI()) {
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand);
- if (Subtarget->is64Bit())
+ if (Subtarget.is64Bit())
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
} else {
setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
- if (Subtarget->is64Bit())
+ if (Subtarget.is64Bit())
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
}
- if (Subtarget->hasLZCNT()) {
+ if (Subtarget.hasLZCNT()) {
// When promoting the i8 variants, force them to i32 for a shorter
// encoding.
setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
@@ -359,7 +359,7 @@ X86TargetLowering::X86TargetLowering(con
AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand);
- if (Subtarget->is64Bit())
+ if (Subtarget.is64Bit())
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
} else {
setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
@@ -368,7 +368,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
}
@@ -377,7 +377,7 @@ X86TargetLowering::X86TargetLowering(con
// Special handling for half-precision floating point conversions.
// If we don't have F16C support, then lower half float conversions
// into library calls.
- if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) {
+ if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
}
@@ -395,19 +395,19 @@ X86TargetLowering::X86TargetLowering(con
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f80, MVT::f16, Expand);
- if (Subtarget->hasPOPCNT()) {
+ if (Subtarget.hasPOPCNT()) {
setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
} else {
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
- if (Subtarget->is64Bit())
+ if (Subtarget.is64Bit())
setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
}
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
- if (!Subtarget->hasMOVBE())
+ if (!Subtarget.hasMOVBE())
setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
// These should be promoted to a larger select which is supported.
@@ -430,7 +430,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::SETCCE , MVT::i8 , Custom);
setOperationAction(ISD::SETCCE , MVT::i16 , Custom);
setOperationAction(ISD::SETCCE , MVT::i32 , Custom);
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
setOperationAction(ISD::SELECT , MVT::i64 , Custom);
setOperationAction(ISD::SETCC , MVT::i64 , Custom);
setOperationAction(ISD::SETCCE , MVT::i64 , Custom);
@@ -450,11 +450,11 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
- if (Subtarget->is64Bit())
+ if (Subtarget.is64Bit())
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
setOperationAction(ISD::BlockAddress , MVT::i32 , Custom);
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
@@ -465,13 +465,13 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
}
- if (Subtarget->hasSSE1())
+ if (Subtarget.hasSSE1())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
@@ -483,13 +483,13 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
}
- if (Subtarget->hasCmpxchg16b()) {
+ if (Subtarget.hasCmpxchg16b()) {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
}
// FIXME - use subtarget debug flags
- if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
- !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
+ if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
+ !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64()) {
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
}
@@ -505,7 +505,7 @@ X86TargetLowering::X86TargetLowering(con
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
setOperationAction(ISD::VAARG , MVT::Other, Custom);
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
} else {
@@ -523,7 +523,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
- if (!Subtarget->useSoftFloat() && X86ScalarSSEf64) {
+ if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
// f32 and f64 use SSE.
// Set up the FP register classes.
addRegisterClass(MVT::f32, &X86::FR32RegClass);
@@ -557,7 +557,7 @@ X86TargetLowering::X86TargetLowering(con
// cases we handle.
addLegalFPImmediate(APFloat(+0.0)); // xorpd
addLegalFPImmediate(APFloat(+0.0f)); // xorps
- } else if (!Subtarget->useSoftFloat() && X86ScalarSSEf32) {
+ } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
addRegisterClass(MVT::f32, &X86::FR32RegClass);
@@ -592,7 +592,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
}
- } else if (!Subtarget->useSoftFloat()) {
+ } else if (!Subtarget.useSoftFloat()) {
// f32 and f64 in x87.
// Set up the FP register classes.
addRegisterClass(MVT::f64, &X86::RFP64RegClass);
@@ -626,8 +626,8 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FMA, MVT::f32, Expand);
// Long double always uses X87, except f128 in MMX.
- if (!Subtarget->useSoftFloat()) {
- if (Subtarget->is64Bit() && Subtarget->hasMMX()) {
+ if (!Subtarget.useSoftFloat()) {
+ if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
addRegisterClass(MVT::f128, &X86::FR128RegClass);
ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
setOperationAction(ISD::FABS , MVT::f128, Custom);
@@ -774,7 +774,7 @@ X86TargetLowering::X86TargetLowering(con
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
// with -msoft-float, disable use of MMX as well.
- if (!Subtarget->useSoftFloat() && Subtarget->hasMMX()) {
+ if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
// No operations on x86mmx supported, everything uses intrinsics.
}
@@ -792,7 +792,7 @@ X86TargetLowering::X86TargetLowering(con
}
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
- if (!Subtarget->useSoftFloat() && Subtarget->hasSSE1()) {
+ if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
@@ -811,7 +811,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
}
- if (!Subtarget->useSoftFloat() && Subtarget->hasSSE2()) {
+ if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
// FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
@@ -908,7 +908,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
}
@@ -942,7 +942,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
// As there is no 64-bit GPR available, we need build a special custom
// sequence to convert from v2i32 to v2f32.
- if (!Subtarget->is64Bit())
+ if (!Subtarget.is64Bit())
setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
@@ -956,7 +956,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
}
- if (!Subtarget->useSoftFloat() && Subtarget->hasSSE41()) {
+ if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
setOperationAction(ISD::FCEIL, RoundedTy, Legal);
@@ -1020,13 +1020,13 @@ X86TargetLowering::X86TargetLowering(con
// FIXME: these should be Legal, but that's only for the case where
// the index is constant. For now custom expand to deal with that.
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
}
}
- if (Subtarget->hasSSE2()) {
+ if (Subtarget.hasSSE2()) {
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
@@ -1052,7 +1052,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::SRA, MVT::v4i32, Custom);
}
- if (Subtarget->hasXOP()) {
+ if (Subtarget.hasXOP()) {
setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
@@ -1063,7 +1063,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::ROTL, MVT::v4i64, Custom);
}
- if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) {
+ if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) {
addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
addRegisterClass(MVT::v8i32, &X86::VR256RegClass);
@@ -1162,7 +1162,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
- if (Subtarget->hasAnyFMA()) {
+ if (Subtarget.hasAnyFMA()) {
setOperationAction(ISD::FMA, MVT::v8f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f64, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
@@ -1171,7 +1171,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FMA, MVT::f64, Legal);
}
- if (Subtarget->hasInt256()) {
+ if (Subtarget.hasInt256()) {
setOperationAction(ISD::ADD, MVT::v4i64, Legal);
setOperationAction(ISD::ADD, MVT::v8i32, Legal);
setOperationAction(ISD::ADD, MVT::v16i16, Legal);
@@ -1289,7 +1289,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
}
- if (Subtarget->hasInt256())
+ if (Subtarget.hasInt256())
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
// Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
@@ -1307,7 +1307,7 @@ X86TargetLowering::X86TargetLowering(con
}
}
- if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
+ if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
@@ -1388,7 +1388,7 @@ X86TargetLowering::X86TargetLowering(con
setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
- if (Subtarget->hasVLX()){
+ if (Subtarget.hasVLX()){
setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
@@ -1411,7 +1411,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom);
- if (Subtarget->hasDQI()) {
+ if (Subtarget.hasDQI()) {
setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
@@ -1419,7 +1419,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
- if (Subtarget->hasVLX()) {
+ if (Subtarget.hasVLX()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal);
@@ -1430,7 +1430,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
}
}
- if (Subtarget->hasVLX()) {
+ if (Subtarget.hasVLX()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
@@ -1452,7 +1452,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
- if (Subtarget->hasDQI()) {
+ if (Subtarget.hasDQI()) {
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
}
@@ -1524,7 +1524,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::OR, MVT::v16i32, Legal);
setOperationAction(ISD::XOR, MVT::v16i32, Legal);
- if (Subtarget->hasCDI()) {
+ if (Subtarget.hasCDI()) {
setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i64, Expand);
@@ -1542,7 +1542,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i32, Custom);
- if (Subtarget->hasVLX()) {
+ if (Subtarget.hasVLX()) {
setOperationAction(ISD::CTLZ, MVT::v4i64, Legal);
setOperationAction(ISD::CTLZ, MVT::v8i32, Legal);
setOperationAction(ISD::CTLZ, MVT::v2i64, Legal);
@@ -1566,9 +1566,9 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Expand);
}
- } // Subtarget->hasCDI()
+ } // Subtarget.hasCDI()
- if (Subtarget->hasDQI()) {
+ if (Subtarget.hasDQI()) {
setOperationAction(ISD::MUL, MVT::v2i64, Legal);
setOperationAction(ISD::MUL, MVT::v4i64, Legal);
setOperationAction(ISD::MUL, MVT::v8i64, Legal);
@@ -1617,7 +1617,7 @@ X86TargetLowering::X86TargetLowering(con
}
}// has AVX-512
- if (!Subtarget->useSoftFloat() && Subtarget->hasBWI()) {
+ if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
@@ -1678,10 +1678,10 @@ X86TargetLowering::X86TargetLowering(con
setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
- if (Subtarget->hasVLX())
+ if (Subtarget.hasVLX())
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
- if (Subtarget->hasCDI()) {
+ if (Subtarget.hasCDI()) {
setOperationAction(ISD::CTLZ, MVT::v32i16, Custom);
setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i16, Expand);
@@ -1704,7 +1704,7 @@ X86TargetLowering::X86TargetLowering(con
}
}
- if (!Subtarget->useSoftFloat() && Subtarget->hasVLX()) {
+ if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
@@ -1744,7 +1744,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
- if (!Subtarget->is64Bit()) {
+ if (!Subtarget.is64Bit()) {
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
}
@@ -1756,7 +1756,7 @@ X86TargetLowering::X86TargetLowering(con
// subtraction on x86-32 once PR3203 is fixed. We really can't do much better
// than generic legalization for 64-bit multiplication-with-overflow, though.
for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
- if (VT == MVT::i64 && !Subtarget->is64Bit())
+ if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
// Add/Sub/Mul with overflow operations are custom lowered.
setOperationAction(ISD::SADDO, VT, Custom);
@@ -1767,7 +1767,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::UMULO, VT, Custom);
}
- if (!Subtarget->is64Bit()) {
+ if (!Subtarget.is64Bit()) {
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
@@ -1775,10 +1775,10 @@ X86TargetLowering::X86TargetLowering(con
}
// Combine sin / cos into one node or libcall if possible.
- if (Subtarget->hasSinCos()) {
+ if (Subtarget.hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
- if (Subtarget->isTargetDarwin()) {
+ if (Subtarget.isTargetDarwin()) {
// For MacOSX, we don't want the normal expansion of a libcall to sincos.
// We want to issue a libcall to __sincos_stret to avoid memory traffic.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
@@ -1786,7 +1786,7 @@ X86TargetLowering::X86TargetLowering(con
}
}
- if (Subtarget->isTargetWin64()) {
+ if (Subtarget.isTargetWin64()) {
setOperationAction(ISD::SDIV, MVT::i128, Custom);
setOperationAction(ISD::UDIV, MVT::i128, Custom);
setOperationAction(ISD::SREM, MVT::i128, Custom);
@@ -1832,7 +1832,7 @@ X86TargetLowering::X86TargetLowering(con
setTargetDAGCombine(ISD::MSCATTER);
setTargetDAGCombine(ISD::MGATHER);
- computeRegisterProperties(Subtarget->getRegisterInfo());
+ computeRegisterProperties(Subtarget.getRegisterInfo());
MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
MaxStoresPerMemsetOptSize = 8;
@@ -1844,7 +1844,7 @@ X86TargetLowering::X86TargetLowering(con
// A predictable cmov does not hurt on an in-order CPU.
// FIXME: Use a CPU attribute to trigger this, not a CPU model.
- PredictableSelectIsExpensive = !Subtarget->isAtom();
+ PredictableSelectIsExpensive = !Subtarget.isAtom();
EnableExtLdPromotion = true;
setPrefFunctionAlignment(4); // 2^4 bytes.
@@ -1853,7 +1853,7 @@ X86TargetLowering::X86TargetLowering(con
// This has so far only been implemented for 64-bit MachO.
bool X86TargetLowering::useLoadStackGuardNode() const {
- return Subtarget->isTargetMachO() && Subtarget->is64Bit();
+ return Subtarget.isTargetMachO() && Subtarget.is64Bit();
}
TargetLoweringBase::LegalizeTypeAction
@@ -1869,21 +1869,21 @@ X86TargetLowering::getPreferredVectorAct
EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
EVT VT) const {
if (!VT.isVector())
- return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
+ return Subtarget.hasAVX512() ? MVT::i1: MVT::i8;
if (VT.isSimple()) {
MVT VVT = VT.getSimpleVT();
const unsigned NumElts = VVT.getVectorNumElements();
const MVT EltVT = VVT.getVectorElementType();
if (VVT.is512BitVector()) {
- if (Subtarget->hasAVX512())
+ if (Subtarget.hasAVX512())
if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
EltVT == MVT::f32 || EltVT == MVT::f64)
switch(NumElts) {
case 8: return MVT::v8i1;
case 16: return MVT::v16i1;
}
- if (Subtarget->hasBWI())
+ if (Subtarget.hasBWI())
if (EltVT == MVT::i8 || EltVT == MVT::i16)
switch(NumElts) {
case 32: return MVT::v32i1;
@@ -1892,7 +1892,7 @@ EVT X86TargetLowering::getSetCCResultTyp
}
if (VVT.is256BitVector() || VVT.is128BitVector()) {
- if (Subtarget->hasVLX())
+ if (Subtarget.hasVLX())
if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
EltVT == MVT::f32 || EltVT == MVT::f64)
switch(NumElts) {
@@ -1900,7 +1900,7 @@ EVT X86TargetLowering::getSetCCResultTyp
case 4: return MVT::v4i1;
case 8: return MVT::v8i1;
}
- if (Subtarget->hasBWI() && Subtarget->hasVLX())
+ if (Subtarget.hasBWI() && Subtarget.hasVLX())
if (EltVT == MVT::i8 || EltVT == MVT::i16)
switch(NumElts) {
case 8: return MVT::v8i1;
@@ -1944,7 +1944,7 @@ static void getMaxByValAlign(Type *Ty, u
/// are at 4-byte boundaries.
unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
// Max of 8 and alignment of type.
unsigned TyAlign = DL.getABITypeAlignment(Ty);
if (TyAlign > 8)
@@ -1953,7 +1953,7 @@ unsigned X86TargetLowering::getByValType
}
unsigned Align = 4;
- if (Subtarget->hasSSE1())
+ if (Subtarget.hasSSE1())
getMaxByValAlign(Ty, Align);
return Align;
}
@@ -1979,23 +1979,23 @@ X86TargetLowering::getOptimalMemOpType(u
if ((!IsMemset || ZeroMemset) &&
!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
if (Size >= 16 &&
- (!Subtarget->isUnalignedMem16Slow() ||
+ (!Subtarget.isUnalignedMem16Slow() ||
((DstAlign == 0 || DstAlign >= 16) &&
(SrcAlign == 0 || SrcAlign >= 16)))) {
if (Size >= 32) {
// FIXME: Check if unaligned 32-byte accesses are slow.
- if (Subtarget->hasInt256())
+ if (Subtarget.hasInt256())
return MVT::v8i32;
- if (Subtarget->hasFp256())
+ if (Subtarget.hasFp256())
return MVT::v8f32;
}
- if (Subtarget->hasSSE2())
+ if (Subtarget.hasSSE2())
return MVT::v4i32;
- if (Subtarget->hasSSE1())
+ if (Subtarget.hasSSE1())
return MVT::v4f32;
} else if (!MemcpyStrSrc && Size >= 8 &&
- !Subtarget->is64Bit() &&
- Subtarget->hasSSE2()) {
+ !Subtarget.is64Bit() &&
+ Subtarget.hasSSE2()) {
// Do not use f64 to lower memcpy if source is string constant. It's
// better to use i32 to avoid the loads.
return MVT::f64;
@@ -2004,7 +2004,7 @@ X86TargetLowering::getOptimalMemOpType(u
// This is a compromise. If we reach here, unaligned accesses may be slow on
// this target. However, creating smaller, aligned accesses could be even
// slower and would certainly be a lot more code.
- if (Subtarget->is64Bit() && Size >= 8)
+ if (Subtarget.is64Bit() && Size >= 8)
return MVT::i64;
return MVT::i32;
}
@@ -2029,10 +2029,10 @@ X86TargetLowering::allowsMisalignedMemor
*Fast = true;
break;
case 128:
- *Fast = !Subtarget->isUnalignedMem16Slow();
+ *Fast = !Subtarget.isUnalignedMem16Slow();
break;
case 256:
- *Fast = !Subtarget->isUnalignedMem32Slow();
+ *Fast = !Subtarget.isUnalignedMem32Slow();
break;
// TODO: What about AVX-512 (512-bit) accesses?
}
@@ -2048,7 +2048,7 @@ unsigned X86TargetLowering::getJumpTable
// In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
// symbol.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT())
+ Subtarget.isPICStyleGOT())
return MachineJumpTableInfo::EK_Custom32;
// Otherwise, use the normal jump table encoding heuristics.
@@ -2056,7 +2056,7 @@ unsigned X86TargetLowering::getJumpTable
}
bool X86TargetLowering::useSoftFloat() const {
- return Subtarget->useSoftFloat();
+ return Subtarget.useSoftFloat();
}
const MCExpr *
@@ -2064,7 +2064,7 @@ X86TargetLowering::LowerCustomJumpTableE
const MachineBasicBlock *MBB,
unsigned uid,MCContext &Ctx) const{
assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT());
+ Subtarget.isPICStyleGOT());
// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
// entries.
return MCSymbolRefExpr::create(MBB->getSymbol(),
@@ -2074,7 +2074,7 @@ X86TargetLowering::LowerCustomJumpTableE
/// Returns relocation base for the given PIC jumptable.
SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
- if (!Subtarget->is64Bit())
+ if (!Subtarget.is64Bit())
// This doesn't have SDLoc associated with it, but is not really the
// same as a Register.
return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
@@ -2088,7 +2088,7 @@ const MCExpr *X86TargetLowering::
getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
MCContext &Ctx) const {
// X86-64 uses RIP relative addressing based on the jump table label.
- if (Subtarget->isPICStyleRIPRel())
+ if (Subtarget.isPICStyleRIPRel())
return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
// Otherwise, the reference is relative to the PIC base.
@@ -2104,7 +2104,7 @@ X86TargetLowering::findRepresentativeCla
default:
return TargetLowering::findRepresentativeClass(TRI, VT);
case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
- RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
+ RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
break;
case MVT::x86mmx:
RRC = &X86::VR64RegClass;
@@ -2122,10 +2122,10 @@ X86TargetLowering::findRepresentativeCla
bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
unsigned &Offset) const {
- if (!Subtarget->isTargetLinux())
+ if (!Subtarget.isTargetLinux())
return false;
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
// %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
Offset = 0x28;
if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
@@ -2141,14 +2141,14 @@ bool X86TargetLowering::getStackCookieLo
}
Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
- if (!Subtarget->isTargetAndroid())
+ if (!Subtarget.isTargetAndroid())
return TargetLowering::getSafeStackPointerLocation(IRB);
// Android provides a fixed TLS slot for the SafeStack pointer. See the
// definition of TLS_SLOT_SAFESTACK in
// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
unsigned AddressSpace, Offset;
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
// %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
Offset = 0x48;
if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
@@ -2243,14 +2243,14 @@ X86TargetLowering::LowerReturn(SDValue C
// or SSE or MMX vectors.
if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
- (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
+ (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
report_fatal_error("SSE register return with SSE disabled");
}
// Likewise we can't return F64 values with SSE1 only. gcc does so, but
// llvm-gcc has never done it right and no one has noticed, so this
// should be OK for now.
if (ValVT == MVT::f64 &&
- (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
+ (Subtarget.is64Bit() && !Subtarget.hasSSE2()))
report_fatal_error("SSE2 register return with SSE2 disabled");
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
@@ -2268,7 +2268,7 @@ X86TargetLowering::LowerReturn(SDValue C
// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
// which is returned in RAX / RDX.
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
if (ValVT == MVT::x86mmx) {
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
@@ -2276,7 +2276,7 @@ X86TargetLowering::LowerReturn(SDValue C
ValToCopy);
// If we don't have SSE2 available, convert to v4f32 so the generated
// register is legal.
- if (!Subtarget->hasSSE2())
+ if (!Subtarget.hasSSE2())
ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
}
}
@@ -2301,7 +2301,7 @@ X86TargetLowering::LowerReturn(SDValue C
getPointerTy(MF.getDataLayout()));
unsigned RetValReg
- = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
+ = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
X86::RAX : X86::EAX;
Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
Flag = Chain.getValue(1);
@@ -2311,7 +2311,7 @@ X86TargetLowering::LowerReturn(SDValue C
DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
}
- const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
const MCPhysReg *I =
TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
if (I) {
@@ -2379,7 +2379,7 @@ X86TargetLowering::getTypeForExtArgOrRet
ISD::NodeType ExtendKind) const {
MVT ReturnMVT;
// TODO: Is this also valid on 32-bit?
- if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
+ if (Subtarget.is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
ReturnMVT = MVT::i8;
else
ReturnMVT = MVT::i32;
@@ -2400,7 +2400,7 @@ X86TargetLowering::LowerCallResult(SDVal
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- bool Is64Bit = Subtarget->is64Bit();
+ bool Is64Bit = Subtarget.is64Bit();
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
@@ -2412,7 +2412,7 @@ X86TargetLowering::LowerCallResult(SDVal
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
- ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
+ ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget.hasSSE1())) {
report_fatal_error("SSE register return with SSE disabled");
}
@@ -2618,10 +2618,10 @@ X86TargetLowering::LowerMemArgument(SDVa
// FIXME: Get this from tablegen.
static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
- const X86Subtarget *Subtarget) {
- assert(Subtarget->is64Bit());
+ const X86Subtarget &Subtarget) {
+ assert(Subtarget.is64Bit());
- if (Subtarget->isCallingConvWin64(CallConv)) {
+ if (Subtarget.isCallingConvWin64(CallConv)) {
static const MCPhysReg GPR64ArgRegsWin64[] = {
X86::RCX, X86::RDX, X86::R8, X86::R9
};
@@ -2637,9 +2637,9 @@ static ArrayRef<MCPhysReg> get64BitArgum
// FIXME: Get this from tablegen.
static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
CallingConv::ID CallConv,
- const X86Subtarget *Subtarget) {
- assert(Subtarget->is64Bit());
- if (Subtarget->isCallingConvWin64(CallConv)) {
+ const X86Subtarget &Subtarget) {
+ assert(Subtarget.is64Bit());
+ if (Subtarget.isCallingConvWin64(CallConv)) {
// The XMM registers which might contain var arg parameters are shadowed
// in their paired GPR. So we only need to save the GPR to their home
// slots.
@@ -2649,10 +2649,10 @@ static ArrayRef<MCPhysReg> get64BitArgum
const Function *Fn = MF.getFunction();
bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
- bool isSoftFloat = Subtarget->useSoftFloat();
+ bool isSoftFloat = Subtarget.useSoftFloat();
assert(!(isSoftFloat && NoImplicitFloatOps) &&
"SSE register cannot be used when SSE is disabled!");
- if (isSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
+ if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
// Kernel mode asks for SSE to be disabled, so there are no XMM argument
// registers.
return None;
@@ -2670,17 +2670,17 @@ SDValue X86TargetLowering::LowerFormalAr
SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
- const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+ const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
const Function* Fn = MF.getFunction();
if (Fn->hasExternalLinkage() &&
- Subtarget->isTargetCygMing() &&
+ Subtarget.isTargetCygMing() &&
Fn->getName() == "main")
FuncInfo->setForceFramePointer(true);
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool Is64Bit = Subtarget->is64Bit();
- bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
+ bool Is64Bit = Subtarget.is64Bit();
+ bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe");
@@ -2818,7 +2818,7 @@ SDValue X86TargetLowering::LowerFormalAr
}
// Figure out if XMM registers are in use.
- assert(!(Subtarget->useSoftFloat() &&
+ assert(!(Subtarget.useSoftFloat() &&
Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
"SSE register cannot be used when SSE is disabled!");
@@ -2830,7 +2830,7 @@ SDValue X86TargetLowering::LowerFormalAr
ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
- assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
+ assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
"SSE register cannot be used when SSE is disabled!");
// Gather all the live in physical registers.
@@ -2912,13 +2912,13 @@ SDValue X86TargetLowering::LowerFormalAr
// Find the largest legal vector type.
MVT VecVT = MVT::Other;
// FIXME: Only some x86_32 calling conventions support AVX512.
- if (Subtarget->hasAVX512() &&
+ if (Subtarget.hasAVX512() &&
(Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
CallConv == CallingConv::Intel_OCL_BI)))
VecVT = MVT::v16f32;
- else if (Subtarget->hasAVX())
+ else if (Subtarget.hasAVX())
VecVT = MVT::v8f32;
- else if (Subtarget->hasSSE2())
+ else if (Subtarget.hasSSE2())
VecVT = MVT::v4f32;
// We forward some GPRs and some vector types.
@@ -2959,8 +2959,8 @@ SDValue X86TargetLowering::LowerFormalAr
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
- !Subtarget->getTargetTriple().isOSMSVCRT() &&
- argsAreStructReturn(Ins, Subtarget->isTargetMCU()) == StackStructReturn)
+ !Subtarget.getTargetTriple().isOSMSVCRT() &&
+ argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
FuncInfo->setBytesToPopOnReturn(4);
}
@@ -3078,9 +3078,9 @@ X86TargetLowering::LowerCall(TargetLower
bool isVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
- bool Is64Bit = Subtarget->is64Bit();
- bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
- StructReturnType SR = callIsStructReturn(Outs, Subtarget->isTargetMCU());
+ bool Is64Bit = Subtarget.is64Bit();
+ bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
+ StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
bool IsSibcall = false;
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
@@ -3091,7 +3091,7 @@ X86TargetLowering::LowerCall(TargetLower
if (Attr.getValueAsString() == "true")
isTailCall = false;
- if (Subtarget->isPICStyleGOT() &&
+ if (Subtarget.isPICStyleGOT() &&
!MF.getTarget().Options.GuaranteedTailCallOpt) {
// If we are using a GOT, disable tail calls to external symbols with
// default visibility. Tail calling such a symbol requires using a GOT
@@ -3194,7 +3194,7 @@ X86TargetLowering::LowerCall(TargetLower
// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization arguments are handle later.
- const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
// Skip inalloca arguments, they have already been written.
ISD::ArgFlagsTy Flags = Outs[i].Flags;
@@ -3272,7 +3272,7 @@ X86TargetLowering::LowerCall(TargetLower
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
- if (Subtarget->isPICStyleGOT()) {
+ if (Subtarget.isPICStyleGOT()) {
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
if (!isTailCall) {
@@ -3313,7 +3313,7 @@ X86TargetLowering::LowerCall(TargetLower
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
- assert((Subtarget->hasSSE1() || !NumXMMRegs)
+ assert((Subtarget.hasSSE1() || !NumXMMRegs)
&& "SSE registers cannot be used when SSE is disabled");
RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
@@ -3423,19 +3423,19 @@ X86TargetLowering::LowerCall(TargetLower
// external symbols most go through the PLT in PIC mode. If the symbol
// has hidden or protected visibility, or if it is static or local, then
// we don't need to use the PLT - we can directly call it.
- if (Subtarget->isTargetELF() &&
+ if (Subtarget.isTargetELF() &&
DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
OpFlags = X86II::MO_PLT;
- } else if (Subtarget->isPICStyleStubAny() &&
+ } else if (Subtarget.isPICStyleStubAny() &&
!GV->isStrongDefinitionForLinker() &&
- (!Subtarget->getTargetTriple().isMacOSX() ||
- Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ (!Subtarget.getTargetTriple().isMacOSX() ||
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
OpFlags = X86II::MO_DARWIN_STUB;
- } else if (Subtarget->isPICStyleRIPRel() && isa<Function>(GV) &&
+ } else if (Subtarget.isPICStyleRIPRel() && isa<Function>(GV) &&
cast<Function>(GV)->hasFnAttribute(Attribute::NonLazyBind)) {
// If the function is marked as non-lazy, generate an indirect call
// which loads from the GOT directly. This avoids runtime overhead
@@ -3464,12 +3464,12 @@ X86TargetLowering::LowerCall(TargetLower
// On ELF targets, in either X86-64 or X86-32 mode, direct calls to
// external symbols should go through the PLT.
- if (Subtarget->isTargetELF() &&
+ if (Subtarget.isTargetELF() &&
DAG.getTarget().getRelocationModel() == Reloc::PIC_) {
OpFlags = X86II::MO_PLT;
- } else if (Subtarget->isPICStyleStubAny() &&
- (!Subtarget->getTargetTriple().isMacOSX() ||
- Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ } else if (Subtarget.isPICStyleStubAny() &&
+ (!Subtarget.getTargetTriple().isMacOSX() ||
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -3478,7 +3478,7 @@ X86TargetLowering::LowerCall(TargetLower
Callee = DAG.getTargetExternalSymbol(
S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
- } else if (Subtarget->isTarget64BitILP32() &&
+ } else if (Subtarget.isTarget64BitILP32() &&
Callee->getValueType(0) == MVT::i32) {
// Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
@@ -3551,7 +3551,7 @@ X86TargetLowering::LowerCall(TargetLower
DAG.getTarget().Options.GuaranteedTailCallOpt))
NumBytesForCalleeToPop = NumBytes; // Callee pops everything
else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
- !Subtarget->getTargetTriple().isOSMSVCRT() &&
+ !Subtarget.getTargetTriple().isOSMSVCRT() &&
SR == StackStructReturn)
// If this is a call to a struct-return function, the callee
// pops the hidden struct pointer, so we have to push it back.
@@ -3613,8 +3613,8 @@ X86TargetLowering::LowerCall(TargetLower
unsigned
X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG& DAG) const {
- const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
- const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
uint64_t AlignMask = StackAlignment - 1;
int64_t Offset = StackSize;
@@ -3707,8 +3707,8 @@ bool X86TargetLowering::IsEligibleForTai
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
- bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
- bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC);
+ bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
+ bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
// Win64 functions have extra shadow space for argument homing. Don't do the
// sibcall if the caller and callee have mismatched expectations for this
@@ -3727,7 +3727,7 @@ bool X86TargetLowering::IsEligibleForTai
// Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
// emit a special epilogue.
- const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
if (RegInfo->needsStackRealignment(MF))
return false;
@@ -3829,7 +3829,7 @@ bool X86TargetLowering::IsEligibleForTai
// the caller's fixed stack objects.
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
- const X86InstrInfo *TII = Subtarget->getInstrInfo();
+ const X86InstrInfo *TII = Subtarget.getInstrInfo();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
@@ -3849,7 +3849,7 @@ bool X86TargetLowering::IsEligibleForTai
// only target EAX, EDX, or ECX since the tail call must be scheduled after
// callee-saved registers are restored. These happen to be the same
// registers used to pass 'inreg' arguments so watch out for those.
- if (!Subtarget->is64Bit() &&
+ if (!Subtarget.is64Bit() &&
((!isa<GlobalAddressSDNode>(Callee) &&
!isa<ExternalSymbolSDNode>(Callee)) ||
DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
@@ -3876,7 +3876,7 @@ bool X86TargetLowering::IsEligibleForTai
}
bool CalleeWillPop =
- X86::isCalleePop(CalleeCC, Subtarget->is64Bit(), isVarArg,
+ X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
MF.getTarget().Options.GuaranteedTailCallOpt);
if (unsigned BytesToPop =
@@ -3978,7 +3978,7 @@ static SDValue getTargetShuffleNode(unsi
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
int ReturnAddrIndex = FuncInfo->getRAIndex();
@@ -4289,12 +4289,12 @@ bool X86TargetLowering::isExtractSubvect
bool X86TargetLowering::isCheapToSpeculateCttz() const {
// Speculate cttz only if we can directly use TZCNT.
- return Subtarget->hasBMI();
+ return Subtarget.hasBMI();
}
bool X86TargetLowering::isCheapToSpeculateCtlz() const {
// Speculate ctlz only if we can directly use LZCNT.
- return Subtarget->hasLZCNT();
+ return Subtarget.hasLZCNT();
}
/// Return true if every element in Mask, beginning
@@ -4474,7 +4474,7 @@ static SDValue getConstVector(ArrayRef<i
}
/// Returns a vector of specified type with all zero elements.
-static SDValue getZeroVector(MVT VT, const X86Subtarget *Subtarget,
+static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
SelectionDAG &DAG, SDLoc dl) {
assert(VT.isVector() && "Expected a vector type");
@@ -4482,7 +4482,7 @@ static SDValue getZeroVector(MVT VT, con
// to their dest type. This ensures they get CSE'd.
SDValue Vec;
if (VT.is128BitVector()) { // SSE
- if (Subtarget->hasSSE2()) { // SSE2
+ if (Subtarget.hasSSE2()) { // SSE2
SDValue Cst = DAG.getConstant(0, dl, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
} else { // SSE1
@@ -4490,7 +4490,7 @@ static SDValue getZeroVector(MVT VT, con
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
}
} else if (VT.is256BitVector()) { // AVX
- if (Subtarget->hasInt256()) { // AVX2
+ if (Subtarget.hasInt256()) { // AVX2
SDValue Cst = DAG.getConstant(0, dl, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
@@ -4508,9 +4508,9 @@ static SDValue getZeroVector(MVT VT, con
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
} else if (VT.getVectorElementType() == MVT::i1) {
- assert((Subtarget->hasBWI() || VT.getVectorNumElements() <= 16)
+ assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16)
&& "Unexpected vector type");
- assert((Subtarget->hasVLX() || VT.getVectorNumElements() >= 8)
+ assert((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8)
&& "Unexpected vector type");
SDValue Cst = DAG.getConstant(0, dl, MVT::i1);
SmallVector<SDValue, 64> Ops(VT.getVectorNumElements(), Cst);
@@ -4756,7 +4756,7 @@ static SDValue Concat256BitVectors(SDVal
/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
/// no AVX2 support, use two <4 x i32> inserted in a <8 x i32> appropriately.
/// Then bitcast to their original type, ensuring they get CSE'd.
-static SDValue getOnesVector(EVT VT, const X86Subtarget *Subtarget,
+static SDValue getOnesVector(EVT VT, const X86Subtarget &Subtarget,
SelectionDAG &DAG, SDLoc dl) {
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
"Expected a 128/256/512-bit vector type");
@@ -4764,7 +4764,7 @@ static SDValue getOnesVector(EVT VT, con
APInt Ones = APInt::getAllOnesValue(32);
unsigned NumElts = VT.getSizeInBits() / 32;
SDValue Vec;
- if (!Subtarget->hasInt256() && NumElts == 8) {
+ if (!Subtarget.hasInt256() && NumElts == 8) {
Vec = DAG.getConstant(Ones, dl, MVT::v4i32);
Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
} else {
@@ -4803,7 +4803,7 @@ static SDValue getUnpackh(SelectionDAG &
/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
bool IsZero,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = V2.getSimpleValueType();
SDValue V1 = IsZero
@@ -5180,7 +5180,7 @@ static SDValue getShuffleScalarElt(SDNod
static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
- const X86Subtarget* Subtarget,
+ const X86Subtarget &Subtarget,
const TargetLowering &TLI) {
if (NumNonZero > 8)
return SDValue();
@@ -5190,7 +5190,7 @@ static SDValue LowerBuildVectorv16i8(SDV
bool First = true;
// SSE4.1 - use PINSRB to insert each byte directly.
- if (Subtarget->hasSSE41()) {
+ if (Subtarget.hasSSE41()) {
for (unsigned i = 0; i < 16; ++i) {
bool isNonZero = (NonZeros & (1 << i)) != 0;
if (isNonZero) {
@@ -5250,7 +5250,7 @@ static SDValue LowerBuildVectorv16i8(SDV
static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
- const X86Subtarget* Subtarget,
+ const X86Subtarget &Subtarget,
const TargetLowering &TLI) {
if (NumNonZero > 4)
return SDValue();
@@ -5279,7 +5279,7 @@ static SDValue LowerBuildVectorv8i16(SDV
/// Custom lower build_vector of v4i32 or v4f32.
static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
const TargetLowering &TLI) {
// Find all zeroable elements.
std::bitset<4> Zeroable;
@@ -5343,7 +5343,7 @@ static SDValue LowerBuildVectorv4x32(SDV
}
// See if we can lower this build_vector to a INSERTPS.
- if (!Subtarget->hasSSE41())
+ if (!Subtarget.hasSSE41())
return SDValue();
SDValue V2 = Elt.getOperand(0);
@@ -5624,12 +5624,12 @@ static SDValue EltsFromConsecutiveLoads(
/// a scalar load, or a constant.
/// The VBROADCAST node is returned when a pattern is found,
/// or SDValue() otherwise.
-static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
+static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
// VBROADCAST requires AVX.
// TODO: Splats could be generated for non-AVX CPUs using SSE
// instructions, but there's less potential gain for only 128-bit vectors.
- if (!Subtarget->hasAVX())
+ if (!Subtarget.hasAVX())
return SDValue();
MVT VT = Op.getSimpleValueType();
@@ -5679,7 +5679,7 @@ static SDValue LowerVectorBroadcast(SDVa
if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
Sc.getOpcode() != ISD::BUILD_VECTOR) {
- if (!Subtarget->hasInt256())
+ if (!Subtarget.hasInt256())
return SDValue();
// Use the register form of the broadcast instruction available on AVX2.
@@ -5697,7 +5697,7 @@ static SDValue LowerVectorBroadcast(SDVa
// Constants may have multiple users.
// AVX-512 has register version of the broadcast
- bool hasRegVer = Subtarget->hasAVX512() && VT.is512BitVector() &&
+ bool hasRegVer = Subtarget.hasAVX512() && VT.is512BitVector() &&
Ld.getValueType().getSizeInBits() >= 32;
if (!ConstSplatVal && ((!Sc.hasOneUse() || !Ld.hasOneUse()) &&
!hasRegVer))
@@ -5722,7 +5722,7 @@ static SDValue LowerVectorBroadcast(SDVa
// from the constant pool and not to broadcast it from a scalar.
// But override that restriction when optimizing for size.
// TODO: Check if splatting is recommended for other AVX-capable CPUs.
- if (ConstSplatVal && (Subtarget->hasAVX2() || OptForSize)) {
+ if (ConstSplatVal && (Subtarget.hasAVX2() || OptForSize)) {
EVT CVT = Ld.getValueType();
assert(!CVT.isVector() && "Must not broadcast a vector type");
@@ -5731,7 +5731,7 @@ static SDValue LowerVectorBroadcast(SDVa
// with AVX2, also splat i8 and i16.
// With pattern matching, the VBROADCAST node may become a VMOVDDUP.
if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
- (OptForSize && (ScalarSize == 64 || Subtarget->hasAVX2()))) {
+ (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) {
const Constant *C = nullptr;
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
C = CI->getConstantIntValue();
@@ -5756,7 +5756,7 @@ static SDValue LowerVectorBroadcast(SDVa
bool IsLoad = ISD::isNormalLoad(Ld.getNode());
// Handle AVX2 in-register broadcasts.
- if (!IsLoad && Subtarget->hasInt256() &&
+ if (!IsLoad && Subtarget.hasInt256() &&
(ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
@@ -5765,12 +5765,12 @@ static SDValue LowerVectorBroadcast(SDVa
return SDValue();
if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
- (Subtarget->hasVLX() && ScalarSize == 64))
+ (Subtarget.hasVLX() && ScalarSize == 64))
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
// The integer check is needed for the 64-bit into 128-bit so it doesn't match
// double since there is no vbroadcastsd xmm
- if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) {
+ if (Subtarget.hasInt256() && Ld.getValueType().isInteger()) {
if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
}
@@ -6156,10 +6156,10 @@ static SDValue ExpandHorizontalBinOp(con
/// Try to fold a build_vector that performs an 'addsub' to an X86ISD::ADDSUB
/// node.
static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
- const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ const X86Subtarget &Subtarget, SelectionDAG &DAG) {
MVT VT = BV->getSimpleValueType(0);
- if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
- (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
+ if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
+ (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
return SDValue();
SDLoc DL(BV);
@@ -6258,7 +6258,7 @@ static SDValue LowerToAddSub(const Build
/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = BV->getSimpleValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -6282,14 +6282,14 @@ static SDValue LowerToHorizontalOp(const
SDLoc DL(BV);
SDValue InVec0, InVec1;
- if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) {
+ if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) {
// Try to match an SSE3 float HADD/HSUB.
if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
- } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) {
+ } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget.hasSSSE3()) {
// Try to match an SSSE3 integer HADD/HSUB.
if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
@@ -6298,7 +6298,7 @@ static SDValue LowerToHorizontalOp(const
return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
}
- if (!Subtarget->hasAVX())
+ if (!Subtarget.hasAVX())
return SDValue();
if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
@@ -6346,7 +6346,7 @@ static SDValue LowerToHorizontalOp(const
if (CanFold) {
// Fold this build_vector into a single horizontal add/sub.
// Do this only if the target has AVX2.
- if (Subtarget->hasAVX2())
+ if (Subtarget.hasAVX2())
return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
// Do not try to expand this build_vector into a pair of horizontal
@@ -6364,7 +6364,7 @@ static SDValue LowerToHorizontalOp(const
}
if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
- VT == MVT::v16i16) && Subtarget->hasAVX()) {
+ VT == MVT::v16i16) && Subtarget.hasAVX()) {
unsigned X86Opcode;
if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
X86Opcode = X86ISD::HADD;
@@ -6408,7 +6408,7 @@ static SDValue materializeVectorConstant
if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
return Op;
- return getZeroVector(VT, &Subtarget, DAG, DL);
+ return getZeroVector(VT, Subtarget, DAG, DL);
}
// Vectors containing all ones can be matched by pcmpeqd on 128-bit width
@@ -6419,7 +6419,7 @@ static SDValue materializeVectorConstant
return Op;
if (!VT.is512BitVector())
- return getOnesVector(VT, &Subtarget, DAG, DL);
+ return getOnesVector(VT, Subtarget, DAG, DL);
}
return SDValue();
@@ -6434,10 +6434,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDV
unsigned NumElems = Op.getNumOperands();
// Generate vectors for predicate vectors.
- if (VT.getVectorElementType() == MVT::i1 && Subtarget->hasAVX512())
+ if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())
return LowerBUILD_VECTORvXi1(Op, DAG);
- if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, *Subtarget))
+ if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget))
return VectorConstant;
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
@@ -6486,7 +6486,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDV
// insertion that way. Only do this if the value is non-constant or if the
// value is a constant being inserted into element 0. It is cheaper to do
// a constant pool load than it is to do a movd + shuffle.
- if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
+ if (ExtVT == MVT::i64 && !Subtarget.is64Bit() &&
(!IsAllConstants || Idx == 0)) {
if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
// Handle SSE only.
@@ -6511,7 +6511,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDV
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
- (ExtVT == MVT::i64 && Subtarget->is64Bit())) {
+ (ExtVT == MVT::i64 && Subtarget.is64Bit())) {
if (VT.is512BitVector()) {
SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
@@ -6529,7 +6529,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDV
if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
if (VT.is256BitVector()) {
- if (Subtarget->hasAVX()) {
+ if (Subtarget.hasAVX()) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v8i32, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
} else {
@@ -6697,7 +6697,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDV
return Sh;
// For SSE 4.1, use insertps to put the high elements into the low element.
- if (Subtarget->hasSSE41()) {
+ if (Subtarget.hasSSE41()) {
SDValue Result;
if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
@@ -6774,7 +6774,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SD
}
static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG & DAG) {
SDLoc dl(Op);
MVT ResVT = Op.getSimpleValueType();
@@ -6851,7 +6851,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(S
}
static SDValue LowerCONCAT_VECTORS(SDValue Op,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
if (VT.getVectorElementType() == MVT::i1)
@@ -7177,7 +7177,7 @@ static SDValue lowerVectorShuffleAsBitBl
/// that the shuffle mask is a blend, or convertible into a blend with zero.
static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Original,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
@@ -7240,13 +7240,13 @@ static SDValue lowerVectorShuffleAsBlend
case MVT::v4i64:
case MVT::v8i32:
- assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!");
+ assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
// FALLTHROUGH
case MVT::v2i64:
case MVT::v4i32:
// If we have AVX2 it is faster to use VPBLENDD when the shuffle fits into
// that instruction.
- if (Subtarget->hasAVX2()) {
+ if (Subtarget.hasAVX2()) {
// Scale the blend by the number of 32-bit dwords per element.
int Scale = VT.getScalarSizeInBits() / 32;
BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale);
@@ -7271,7 +7271,7 @@ static SDValue lowerVectorShuffleAsBlend
}
case MVT::v16i16: {
- assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!");
+ assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
SmallVector<int, 8> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
// We can lower these with PBLENDW which is mirrored across 128-bit lanes.
@@ -7287,7 +7287,7 @@ static SDValue lowerVectorShuffleAsBlend
// FALLTHROUGH
case MVT::v16i8:
case MVT::v32i8: {
- assert((VT.is128BitVector() || Subtarget->hasAVX2()) &&
+ assert((VT.is128BitVector() || Subtarget.hasAVX2()) &&
"256-bit byte-blends require AVX2 support!");
// Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
@@ -7425,7 +7425,7 @@ static SDValue lowerVectorShuffleAsDecom
static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
SDValue V2,
ArrayRef<int> Mask,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
@@ -7503,7 +7503,7 @@ static SDValue lowerVectorShuffleAsByteR
int Scale = 16 / NumLaneElts;
// SSSE3 targets can use the palignr instruction.
- if (Subtarget->hasSSSE3()) {
+ if (Subtarget.hasSSSE3()) {
// Cast the inputs to i8 vector of correct length to match PALIGNR.
MVT AlignVT = MVT::getVectorVT(MVT::i8, 16 * NumLanes);
Lo = DAG.getBitcast(AlignVT, Lo);
@@ -7767,7 +7767,7 @@ static SDValue lowerVectorShuffleWithSSE
/// the same lane.
static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
SDLoc DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV,
- ArrayRef<int> Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ ArrayRef<int> Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
assert(Scale > 1 && "Need a scale to extend.");
int EltBits = VT.getScalarSizeInBits();
int NumElements = VT.getVectorNumElements();
@@ -7800,7 +7800,7 @@ static SDValue lowerVectorShuffleAsSpeci
// Found a valid zext mask! Try various lowering strategies based on the
// input type and available ISA extensions.
- if (Subtarget->hasSSE41()) {
+ if (Subtarget.hasSSE41()) {
// Not worth offseting 128-bit vectors if scale == 2, a pattern using
// PUNPCK will catch this in a later shuffle match.
if (Offset && Scale == 2 && VT.is128BitVector())
@@ -7839,7 +7839,7 @@ static SDValue lowerVectorShuffleAsSpeci
// The SSE4A EXTRQ instruction can efficiently extend the first 2 lanes
// to 64-bits.
- if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget->hasSSE4A()) {
+ if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget.hasSSE4A()) {
assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!");
assert(VT.is128BitVector() && "Unexpected vector width!");
@@ -7865,7 +7865,7 @@ static SDValue lowerVectorShuffleAsSpeci
// If this would require more than 2 unpack instructions to expand, use
// pshufb when available. We can only use more than 2 unpack instructions
// when zero extending i8 elements which also makes it easier to use pshufb.
- if (Scale > 4 && EltBits == 8 && Subtarget->hasSSSE3()) {
+ if (Scale > 4 && EltBits == 8 && Subtarget.hasSSSE3()) {
assert(NumElements == 16 && "Unexpected byte vector width!");
SDValue PSHUFBMask[16];
for (int i = 0; i < 16; ++i) {
@@ -7925,7 +7925,7 @@ static SDValue lowerVectorShuffleAsSpeci
/// are both incredibly common and often quite performance sensitive.
static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ const X86Subtarget &Subtarget, SelectionDAG &DAG) {
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
int Bits = VT.getSizeInBits();
@@ -8084,7 +8084,7 @@ static bool isShuffleFoldableLoad(SDValu
/// across all subtarget feature sets.
static SDValue lowerVectorShuffleAsElementInsertion(
SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ const X86Subtarget &Subtarget, SelectionDAG &DAG) {
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
MVT ExtVT = VT;
MVT EltVT = VT.getVectorElementType();
@@ -8141,7 +8141,7 @@ static SDValue lowerVectorShuffleAsEleme
// This is essentially a special case blend operation, but if we have
// general purpose blend operations, they are always faster. Bail and let
// the rest of the lowering handle these as blends.
- if (Subtarget->hasSSE41())
+ if (Subtarget.hasSSE41())
return SDValue();
// Otherwise, use MOVSD or MOVSS.
@@ -8187,9 +8187,9 @@ static SDValue lowerVectorShuffleAsEleme
/// This assumes we have AVX2.
static SDValue lowerVectorShuffleAsTruncBroadcast(SDLoc DL, MVT VT, SDValue V0,
int BroadcastIdx,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- assert(Subtarget->hasAVX2() &&
+ assert(Subtarget.hasAVX2() &&
"We can only lower integer broadcasts with AVX2!");
EVT EltVT = VT.getVectorElementType();
@@ -8242,11 +8242,11 @@ static SDValue lowerVectorShuffleAsTrunc
/// FIXME: This is very similar to LowerVectorBroadcast - can we merge them?
static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
ArrayRef<int> Mask,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- if (!Subtarget->hasAVX())
+ if (!Subtarget.hasAVX())
return SDValue();
- if (VT.isInteger() && !Subtarget->hasAVX2())
+ if (VT.isInteger() && !Subtarget.hasAVX2())
return SDValue();
// Check that the mask is a broadcast.
@@ -8317,11 +8317,11 @@ static SDValue lowerVectorShuffleAsBroad
// If the scalar isn't a load, we can't broadcast from it in AVX1.
// Only AVX2 has register broadcasts.
- if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
+ if (!Subtarget.hasAVX2() && !isShuffleFoldableLoad(V))
return SDValue();
} else if (MayFoldLoad(BC) && !cast<LoadSDNode>(BC)->isVolatile()) {
// 32-bit targets need to load i64 as a f64 and then bitcast the result.
- if (!Subtarget->is64Bit() && VT.getScalarType() == MVT::i64)
+ if (!Subtarget.is64Bit() && VT.getScalarType() == MVT::i64)
BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements());
// If we are broadcasting a load that is only used by the shuffle
@@ -8337,7 +8337,7 @@ static SDValue lowerVectorShuffleAsBroad
V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
DAG.getMachineFunction().getMachineMemOperand(
Ld->getMemOperand(), Offset, SVT.getStoreSize()));
- } else if (!Subtarget->hasAVX2()) {
+ } else if (!Subtarget.hasAVX2()) {
// We can't broadcast from a vector register without AVX2.
return SDValue();
} else if (BroadcastIdx != 0) {
@@ -8567,7 +8567,7 @@ static SDValue lowerVectorShuffleAsPermu
/// it is better to avoid lowering through this for integer vectors where
/// possible.
static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(Op.getSimpleValueType() == MVT::v2f64 && "Bad shuffle type!");
@@ -8579,7 +8579,7 @@ static SDValue lowerV2F64VectorShuffle(S
if (isSingleInputShuffleMask(Mask)) {
// Use low duplicate instructions for masks that match their pattern.
- if (Subtarget->hasSSE3())
+ if (Subtarget.hasSSE3())
if (isShuffleEquivalent(V1, V2, Mask, {0, 0}))
return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, V1);
@@ -8587,7 +8587,7 @@ static SDValue lowerV2F64VectorShuffle(S
// single input as both of the "inputs" to this instruction..
unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1);
- if (Subtarget->hasAVX()) {
+ if (Subtarget.hasAVX()) {
// If we have AVX, we can use VPERMILPS which will allow folding a load
// into the shuffle.
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1,
@@ -8626,7 +8626,7 @@ static SDValue lowerV2F64VectorShuffle(S
DL, MVT::v2f64, V2,
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S));
- if (Subtarget->hasSSE41())
+ if (Subtarget.hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
Subtarget, DAG))
return Blend;
@@ -8648,7 +8648,7 @@ static SDValue lowerV2F64VectorShuffle(S
/// it falls back to the floating point shuffle operation with appropriate bit
/// casting.
static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(Op.getSimpleValueType() == MVT::v2i64 && "Bad shuffle type!");
@@ -8719,7 +8719,7 @@ static SDValue lowerV2I64VectorShuffle(S
// We have different paths for blend lowering, but they all must use the
// *exact* same predicate.
- bool IsBlendSupported = Subtarget->hasSSE41();
+ bool IsBlendSupported = Subtarget.hasSSE41();
if (IsBlendSupported)
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
Subtarget, DAG))
@@ -8732,7 +8732,7 @@ static SDValue lowerV2I64VectorShuffle(S
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
- if (Subtarget->hasSSSE3())
+ if (Subtarget.hasSSSE3())
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
return Rotate;
@@ -8867,7 +8867,7 @@ static SDValue lowerVectorShuffleWithSHU
/// domain crossing penalties, as these are sufficient to implement all v4f32
/// shuffles.
static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!");
@@ -8887,14 +8887,14 @@ static SDValue lowerV4F32VectorShuffle(S
return Broadcast;
// Use even/odd duplicate instructions for masks that match their pattern.
- if (Subtarget->hasSSE3()) {
+ if (Subtarget.hasSSE3()) {
if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2}))
return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1);
if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3}))
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1);
}
- if (Subtarget->hasAVX()) {
+ if (Subtarget.hasAVX()) {
// If we have AVX, we can use VPERMILPS which will allow folding a load
// into the shuffle.
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f32, V1,
@@ -8917,7 +8917,7 @@ static SDValue lowerV4F32VectorShuffle(S
Mask, Subtarget, DAG))
return V;
- if (Subtarget->hasSSE41()) {
+ if (Subtarget.hasSSE41()) {
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
Subtarget, DAG))
return Blend;
@@ -8946,7 +8946,7 @@ static SDValue lowerV4F32VectorShuffle(S
/// We try to handle these with integer-domain shuffles where we can, but for
/// blends we use the floating point domain blend instructions.
static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(Op.getSimpleValueType() == MVT::v4i32 && "Bad shuffle type!");
@@ -9001,7 +9001,7 @@ static SDValue lowerV4I32VectorShuffle(S
// We have different paths for blend lowering, but they all must use the
// *exact* same predicate.
- bool IsBlendSupported = Subtarget->hasSSE41();
+ bool IsBlendSupported = Subtarget.hasSSE41();
if (IsBlendSupported)
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
Subtarget, DAG))
@@ -9018,7 +9018,7 @@ static SDValue lowerV4I32VectorShuffle(S
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
- if (Subtarget->hasSSSE3())
+ if (Subtarget.hasSSSE3())
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
return Rotate;
@@ -9063,7 +9063,7 @@ static SDValue lowerV4I32VectorShuffle(S
/// vector, form the analogous 128-bit 8-element Mask.
static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
SDLoc DL, MVT VT, SDValue V, MutableArrayRef<int> Mask,
- const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ const X86Subtarget &Subtarget, SelectionDAG &DAG) {
assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!");
MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
@@ -9580,7 +9580,7 @@ static SDValue lowerVectorShuffleAsPSHUF
/// halves of the inputs separately (making them have relatively few inputs)
/// and then concatenate them.
static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(Op.getSimpleValueType() == MVT::v8i16 && "Bad shuffle type!");
@@ -9641,7 +9641,7 @@ static SDValue lowerV8I16VectorShuffle(S
return Shift;
// See if we can use SSE4A Extraction / Insertion.
- if (Subtarget->hasSSE4A())
+ if (Subtarget.hasSSE4A())
if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, DAG))
return V;
@@ -9653,7 +9653,7 @@ static SDValue lowerV8I16VectorShuffle(S
// We have different paths for blend lowering, but they all must use the
// *exact* same predicate.
- bool IsBlendSupported = Subtarget->hasSSE41();
+ bool IsBlendSupported = Subtarget.hasSSE41();
if (IsBlendSupported)
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
Subtarget, DAG))
@@ -9683,7 +9683,7 @@ static SDValue lowerV8I16VectorShuffle(S
// If we can't directly blend but can use PSHUFB, that will be better as it
// can both shuffle and set up the inefficient blend.
- if (!IsBlendSupported && Subtarget->hasSSSE3()) {
+ if (!IsBlendSupported && Subtarget.hasSSSE3()) {
bool V1InUse, V2InUse;
return lowerVectorShuffleAsPSHUFB(DL, MVT::v8i16, V1, V2, Mask, DAG,
V1InUse, V2InUse);
@@ -9771,7 +9771,7 @@ static int canLowerByDroppingEvenElement
/// the existing lowering for v8i16 blends on each half, finally PACK-ing them
/// back together.
static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(Op.getSimpleValueType() == MVT::v16i8 && "Bad shuffle type!");
@@ -9797,7 +9797,7 @@ static SDValue lowerV16I8VectorShuffle(S
return ZExt;
// See if we can use SSE4A Extraction / Insertion.
- if (Subtarget->hasSSE4A())
+ if (Subtarget.hasSSE4A())
if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, DAG))
return V;
@@ -9924,7 +9924,7 @@ static SDValue lowerV16I8VectorShuffle(S
// FIXME: The only exceptions to the above are blends which are exact
// interleavings with direct instructions supporting them. We currently don't
// handle those well here.
- if (Subtarget->hasSSSE3()) {
+ if (Subtarget.hasSSSE3()) {
bool V1InUse = false;
bool V2InUse = false;
@@ -9935,7 +9935,7 @@ static SDValue lowerV16I8VectorShuffle(S
// do so. This avoids using them to handle blends-with-zero which is
// important as a single pshufb is significantly faster for that.
if (V1InUse && V2InUse) {
- if (Subtarget->hasSSE41())
+ if (Subtarget.hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i8, V1, V2,
Mask, Subtarget, DAG))
return Blend;
@@ -10064,7 +10064,7 @@ static SDValue lowerV16I8VectorShuffle(S
/// This routine breaks down the specific type of 128-bit shuffle and
/// dispatches to the lowering routines accordingly.
static SDValue lower128BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- MVT VT, const X86Subtarget *Subtarget,
+ MVT VT, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
switch (VT.SimpleTy) {
case MVT::v2i64:
@@ -10382,7 +10382,7 @@ static SDValue lowerVectorShuffleAsLaneP
/// \brief Handle lowering 2-lane 128-bit shuffles.
static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
// TODO: If minimizing size and one of the inputs is a zero vector and the
// the zero vector has only one use, we could use a VPERM2X128 to save the
@@ -10475,7 +10475,7 @@ static SDValue lowerV2X128VectorShuffle(
/// those are still *marginally* more expensive.
static SDValue lowerVectorShuffleByMerging128BitLanes(
SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ const X86Subtarget &Subtarget, SelectionDAG &DAG) {
assert(!isSingleInputShuffleMask(Mask) &&
"This is only useful with multiple inputs.");
@@ -10549,7 +10549,7 @@ static SDValue lowerVectorShuffleByMergi
/// or shuffling smaller vector types which can lower more efficiently.
static SDValue lowerVectorShuffleWithUndefHalf(SDLoc DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(VT.is256BitVector() && "Expected 256-bit vector");
@@ -10635,7 +10635,7 @@ static SDValue lowerVectorShuffleWithUnd
return SDValue();
// AVX2 - XXXXuuuu - always extract lowers.
- if (Subtarget->hasAVX2() && !(UndefUpper && NumUpperHalves == 0)) {
+ if (Subtarget.hasAVX2() && !(UndefUpper && NumUpperHalves == 0)) {
// AVX2 supports efficient immediate 64-bit element cross-lane shuffles.
if (VT == MVT::v4f64 || VT == MVT::v4i64)
return SDValue();
@@ -10714,7 +10714,7 @@ static SDValue lowerVectorShuffleWithSHU
/// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
/// isn't available.
static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
@@ -10748,7 +10748,7 @@ static SDValue lowerV4F64VectorShuffle(S
}
// With AVX2 we have direct support for this permutation.
- if (Subtarget->hasAVX2())
+ if (Subtarget.hasAVX2())
return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4f64, V1,
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
@@ -10775,7 +10775,7 @@ static SDValue lowerV4F64VectorShuffle(S
// shuffle. However, if we have AVX2 and either inputs are already in place,
// we will be able to shuffle even across lanes the other input in a single
// instruction so skip this pattern.
- if (!(Subtarget->hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) ||
+ if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) ||
isShuffleMaskInputInPlace(1, Mask))))
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
@@ -10783,7 +10783,7 @@ static SDValue lowerV4F64VectorShuffle(S
// If we have AVX2 then we always want to lower with a blend because an v4 we
// can fully permute the elements.
- if (Subtarget->hasAVX2())
+ if (Subtarget.hasAVX2())
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2,
Mask, DAG);
@@ -10796,7 +10796,7 @@ static SDValue lowerV4F64VectorShuffle(S
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v4i64 shuffling..
static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
@@ -10804,7 +10804,7 @@ static SDValue lowerV4I64VectorShuffle(S
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
- assert(Subtarget->hasAVX2() && "We can only lower v4i64 with AVX2!");
+ assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!");
SmallVector<int, 4> WidenedMask;
if (canWidenShuffleElements(Mask, WidenedMask))
@@ -10859,7 +10859,7 @@ static SDValue lowerV4I64VectorShuffle(S
// shuffle. However, if we have AVX2 and either inputs are already in place,
// we will be able to shuffle even across lanes the other input in a single
// instruction so skip this pattern.
- if (!(Subtarget->hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) ||
+ if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) ||
isShuffleMaskInputInPlace(1, Mask))))
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
@@ -10875,7 +10875,7 @@ static SDValue lowerV4I64VectorShuffle(S
/// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
/// isn't available.
static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
@@ -10936,7 +10936,7 @@ static SDValue lowerV8F32VectorShuffle(S
X86ISD::VPERMILPV, DL, MVT::v8f32, V1,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask));
- if (Subtarget->hasAVX2())
+ if (Subtarget.hasAVX2())
return DAG.getNode(
X86ISD::VPERMV, DL, MVT::v8f32,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1);
@@ -10954,7 +10954,7 @@ static SDValue lowerV8F32VectorShuffle(S
// If we have AVX2 then we always want to lower with a blend because at v8 we
// can fully permute the elements.
- if (Subtarget->hasAVX2())
+ if (Subtarget.hasAVX2())
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2,
Mask, DAG);
@@ -10967,7 +10967,7 @@ static SDValue lowerV8F32VectorShuffle(S
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v8i32 shuffling..
static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
@@ -10975,7 +10975,7 @@ static SDValue lowerV8I32VectorShuffle(S
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
- assert(Subtarget->hasAVX2() && "We can only lower v8i32 with AVX2!");
+ assert(Subtarget.hasAVX2() && "We can only lower v8i32 with AVX2!");
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
@@ -11047,7 +11047,7 @@ static SDValue lowerV8I32VectorShuffle(S
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v16i16 shuffling..
static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
@@ -11055,7 +11055,7 @@ static SDValue lowerV16I16VectorShuffle(
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
- assert(Subtarget->hasAVX2() && "We can only lower v16i16 with AVX2!");
+ assert(Subtarget.hasAVX2() && "We can only lower v16i16 with AVX2!");
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
@@ -11138,7 +11138,7 @@ static SDValue lowerV16I16VectorShuffle(
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v32i8 shuffling..
static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
@@ -11146,7 +11146,7 @@ static SDValue lowerV32I8VectorShuffle(S
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
- assert(Subtarget->hasAVX2() && "We can only lower v32i8 with AVX2!");
+ assert(Subtarget.hasAVX2() && "We can only lower v32i8 with AVX2!");
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
@@ -11215,7 +11215,7 @@ static SDValue lowerV32I8VectorShuffle(S
/// shuffle or splits it into two 128-bit shuffles and fuses the results back
/// together based on the available instructions.
static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- MVT VT, const X86Subtarget *Subtarget,
+ MVT VT, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
@@ -11244,7 +11244,7 @@ static SDValue lower256BitVectorShuffle(
// essentially *zero* ability to manipulate a 256-bit vector with integer
// types. Since we'll use floating point types there eventually, just
// immediately cast everything to a float and operate entirely in that domain.
- if (VT.isInteger() && !Subtarget->hasAVX2()) {
+ if (VT.isInteger() && !Subtarget.hasAVX2()) {
int ElementBits = VT.getScalarSizeInBits();
if (ElementBits < 32)
// No floating point type available, decompose into 128-bit vectors.
@@ -11329,7 +11329,7 @@ static SDValue lowerVectorShuffleWithPER
/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
@@ -11351,7 +11351,7 @@ static SDValue lowerV8F64VectorShuffle(S
/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
@@ -11369,7 +11369,7 @@ static SDValue lowerV16F32VectorShuffle(
/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
@@ -11391,7 +11391,7 @@ static SDValue lowerV8I64VectorShuffle(S
/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
@@ -11409,7 +11409,7 @@ static SDValue lowerV16I32VectorShuffle(
/// \brief Handle lowering of 32-lane 16-bit integer shuffles.
static SDValue lowerV32I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
@@ -11417,14 +11417,14 @@ static SDValue lowerV32I16VectorShuffle(
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
- assert(Subtarget->hasBWI() && "We can only lower v32i16 with AVX-512-BWI!");
+ assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!");
return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 64-lane 8-bit integer shuffles.
static SDValue lowerV64I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
@@ -11432,7 +11432,7 @@ static SDValue lowerV64I8VectorShuffle(S
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!");
- assert(Subtarget->hasBWI() && "We can only lower v64i8 with AVX-512-BWI!");
+ assert(Subtarget.hasBWI() && "We can only lower v64i8 with AVX-512-BWI!");
// FIXME: Implement direct support for this type!
return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
@@ -11444,12 +11444,12 @@ static SDValue lowerV64I8VectorShuffle(S
/// shuffle or splits it into two 256-bit shuffles and fuses the results back
/// together based on the available instructions.
static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- MVT VT, const X86Subtarget *Subtarget,
+ MVT VT, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
- assert(Subtarget->hasAVX512() &&
+ assert(Subtarget.hasAVX512() &&
"Cannot lower 512-bit vectors w/ basic ISA!");
// Check for being able to broadcast a single element.
@@ -11471,11 +11471,11 @@ static SDValue lower512BitVectorShuffle(
case MVT::v16i32:
return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v32i16:
- if (Subtarget->hasBWI())
+ if (Subtarget.hasBWI())
return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
break;
case MVT::v64i8:
- if (Subtarget->hasBWI())
+ if (Subtarget.hasBWI())
return lowerV64I8VectorShuffle(Op, V1, V2, Subtarget, DAG);
break;
@@ -11492,12 +11492,12 @@ static SDValue lower512BitVectorShuffle(
// The only way to shuffle bits is to sign-extend the mask vector to SIMD
// vector, shuffle and then truncate it back.
static SDValue lower1BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- MVT VT, const X86Subtarget *Subtarget,
+ MVT VT, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
- assert(Subtarget->hasAVX512() &&
+ assert(Subtarget.hasAVX512() &&
"Cannot lower 512-bit vectors w/o basic ISA!");
MVT ExtVT;
switch (VT.SimpleTy) {
@@ -11548,7 +11548,7 @@ static SDValue lower1BitVectorShuffle(SD
/// above in helper routines. The canonicalization attempts to widen shuffles
/// to involve fewer lanes of wider elements, consolidate symmetric patterns
/// s.t. only one of the two inputs needs to be tested, etc.
-static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
@@ -11729,7 +11729,7 @@ static bool BUILD_VECTORtoBlendMask(Buil
/// \brief Try to lower a VSELECT instruction to a vector shuffle.
static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue Cond = Op.getOperand(0);
SDValue LHS = Op.getOperand(1);
@@ -11767,7 +11767,7 @@ SDValue X86TargetLowering::LowerVSELECT(
return BlendOp;
// Variable blends are only legal from SSE4.1 onward.
- if (!Subtarget->hasSSE41())
+ if (!Subtarget.hasSSE41())
return SDValue();
// Only some types will be legal on some subtargets. If we can emit a legal
@@ -11780,7 +11780,7 @@ SDValue X86TargetLowering::LowerVSELECT(
case MVT::v32i8:
// The byte blends for AVX vectors were introduced only in AVX2.
- if (Subtarget->hasAVX2())
+ if (Subtarget.hasAVX2())
return Op;
return SDValue();
@@ -11788,7 +11788,7 @@ SDValue X86TargetLowering::LowerVSELECT(
case MVT::v8i16:
case MVT::v16i16:
// AVX-512 BWI and VLX features support VSELECT with i16 elements.
- if (Subtarget->hasBWI() && Subtarget->hasVLX())
+ if (Subtarget.hasBWI() && Subtarget.hasVLX())
return Op;
// FIXME: We should custom lower this by fixing the condition and using i8
@@ -11866,7 +11866,7 @@ X86TargetLowering::ExtractBitFromMaskVec
MVT EltVT = Op.getSimpleValueType();
assert((EltVT == MVT::i1) && "Unexpected operands in ExtractBitFromMaskVector");
- assert((VecVT.getVectorNumElements() <= 16 || Subtarget->hasBWI()) &&
+ assert((VecVT.getVectorNumElements() <= 16 || Subtarget.hasBWI()) &&
"Unexpected vector type in ExtractBitFromMaskVector");
// variable index can't be handled in mask registers,
@@ -11881,7 +11881,7 @@ X86TargetLowering::ExtractBitFromMaskVec
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
const TargetRegisterClass* rc = getRegClassFor(VecVT);
- if (!Subtarget->hasDQI() && (VecVT.getVectorNumElements() <= 8))
+ if (!Subtarget.hasDQI() && (VecVT.getVectorNumElements() <= 8))
rc = getRegClassFor(MVT::v16i1);
unsigned MaxSift = rc->getSize()*8 - 1;
Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
@@ -11905,7 +11905,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_E
if (!isa<ConstantSDNode>(Idx)) {
if (VecVT.is512BitVector() ||
- (VecVT.is256BitVector() && Subtarget->hasInt256() &&
+ (VecVT.is256BitVector() && Subtarget.hasInt256() &&
VecVT.getVectorElementType().getSizeInBits() == 32)) {
MVT MaskEltVT =
@@ -11946,7 +11946,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_E
assert(VecVT.is128BitVector() && "Unexpected vector length");
- if (Subtarget->hasSSE41())
+ if (Subtarget.hasSSE41())
if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG))
return Res;
@@ -12060,8 +12060,8 @@ SDValue X86TargetLowering::LowerINSERT_V
// TODO: It is worthwhile to cast integer to floating point and back
// and incur a domain crossing penalty if that's what we'll end up
// doing anyway after extracting to a 128-bit vector.
- if ((Subtarget->hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) ||
- (Subtarget->hasAVX2() && EltVT == MVT::i32)) {
+ if ((Subtarget.hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) ||
+ (Subtarget.hasAVX2() && EltVT == MVT::i32)) {
SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
N2 = DAG.getIntPtrConstant(1, dl);
return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, N2);
@@ -12085,7 +12085,7 @@ SDValue X86TargetLowering::LowerINSERT_V
}
assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");
- if (Subtarget->hasSSE41()) {
+ if (Subtarget.hasSSE41()) {
if (EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) {
unsigned Opc;
if (VT == MVT::v8i16) {
@@ -12185,7 +12185,7 @@ static SDValue LowerSCALAR_TO_VECTOR(SDV
// Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in
// a simple subregister reference or explicit instructions to grab
// upper bits of a vector.
-static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
SDValue In = Op.getOperand(0);
@@ -12194,7 +12194,7 @@ static SDValue LowerEXTRACT_SUBVECTOR(SD
MVT ResVT = Op.getSimpleValueType();
MVT InVT = In.getSimpleValueType();
- if (Subtarget->hasFp256()) {
+ if (Subtarget.hasFp256()) {
if (ResVT.is128BitVector() &&
(InVT.is256BitVector() || InVT.is512BitVector()) &&
isa<ConstantSDNode>(Idx)) {
@@ -12211,9 +12211,9 @@ static SDValue LowerEXTRACT_SUBVECTOR(SD
// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a
// simple superregister reference or explicit instructions to insert
// the upper bits of a vector.
-static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- if (!Subtarget->hasAVX())
+ if (!Subtarget.hasAVX())
return SDValue();
SDLoc dl(Op);
@@ -12246,7 +12246,7 @@ static SDValue LowerINSERT_SUBVECTOR(SDV
bool Fast;
unsigned Alignment = FirstLd->getAlignment();
unsigned AS = FirstLd->getAddressSpace();
- const X86TargetLowering *TLI = Subtarget->getTargetLowering();
+ const X86TargetLowering *TLI = Subtarget.getTargetLowering();
if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
OpVT, AS, Alignment, &Fast) && Fast) {
SDValue Ops[] = { SubVec2, SubVec };
@@ -12286,12 +12286,12 @@ X86TargetLowering::LowerConstantPool(SDV
unsigned WrapperKind = X86ISD::Wrapper;
CodeModel::Model M = DAG.getTarget().getCodeModel();
- if (Subtarget->isPICStyleRIPRel() &&
+ if (Subtarget.isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
WrapperKind = X86ISD::WrapperRIP;
- else if (Subtarget->isPICStyleGOT())
+ else if (Subtarget.isPICStyleGOT())
OpFlag = X86II::MO_GOTOFF;
- else if (Subtarget->isPICStyleStubPIC())
+ else if (Subtarget.isPICStyleStubPIC())
OpFlag = X86II::MO_PIC_BASE_OFFSET;
auto PtrVT = getPointerTy(DAG.getDataLayout());
@@ -12318,12 +12318,12 @@ SDValue X86TargetLowering::LowerJumpTabl
unsigned WrapperKind = X86ISD::Wrapper;
CodeModel::Model M = DAG.getTarget().getCodeModel();
- if (Subtarget->isPICStyleRIPRel() &&
+ if (Subtarget.isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
WrapperKind = X86ISD::WrapperRIP;
- else if (Subtarget->isPICStyleGOT())
+ else if (Subtarget.isPICStyleGOT())
OpFlag = X86II::MO_GOTOFF;
- else if (Subtarget->isPICStyleStubPIC())
+ else if (Subtarget.isPICStyleStubPIC())
OpFlag = X86II::MO_PIC_BASE_OFFSET;
auto PtrVT = getPointerTy(DAG.getDataLayout());
@@ -12350,16 +12350,16 @@ X86TargetLowering::LowerExternalSymbol(S
unsigned WrapperKind = X86ISD::Wrapper;
CodeModel::Model M = DAG.getTarget().getCodeModel();
- if (Subtarget->isPICStyleRIPRel() &&
+ if (Subtarget.isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel)) {
- if (Subtarget->isTargetDarwin() || Subtarget->isTargetELF())
+ if (Subtarget.isTargetDarwin() || Subtarget.isTargetELF())
OpFlag = X86II::MO_GOTPCREL;
WrapperKind = X86ISD::WrapperRIP;
- } else if (Subtarget->isPICStyleGOT()) {
+ } else if (Subtarget.isPICStyleGOT()) {
OpFlag = X86II::MO_GOT;
- } else if (Subtarget->isPICStyleStubPIC()) {
+ } else if (Subtarget.isPICStyleStubPIC()) {
OpFlag = X86II::MO_DARWIN_NONLAZY_PIC_BASE;
- } else if (Subtarget->isPICStyleStubNoDynamic()) {
+ } else if (Subtarget.isPICStyleStubNoDynamic()) {
OpFlag = X86II::MO_DARWIN_NONLAZY;
}
@@ -12371,7 +12371,7 @@ X86TargetLowering::LowerExternalSymbol(S
// With PIC, the address is actually $g + Offset.
if (DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
- !Subtarget->is64Bit()) {
+ !Subtarget.is64Bit()) {
Result =
DAG.getNode(ISD::ADD, DL, PtrVT,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
@@ -12391,7 +12391,7 @@ SDValue
X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
// Create the TargetBlockAddressAddress node.
unsigned char OpFlags =
- Subtarget->ClassifyBlockAddressReference();
+ Subtarget.ClassifyBlockAddressReference();
CodeModel::Model M = DAG.getTarget().getCodeModel();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
@@ -12399,7 +12399,7 @@ X86TargetLowering::LowerBlockAddress(SDV
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags);
- if (Subtarget->isPICStyleRIPRel() &&
+ if (Subtarget.isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result);
else
@@ -12420,7 +12420,7 @@ X86TargetLowering::LowerGlobalAddress(co
// Create the TargetGlobalAddress node, folding in the constant
// offset if it is legal.
unsigned char OpFlags =
- Subtarget->ClassifyGlobalReference(GV, DAG.getTarget());
+ Subtarget.ClassifyGlobalReference(GV, DAG.getTarget());
CodeModel::Model M = DAG.getTarget().getCodeModel();
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result;
@@ -12433,7 +12433,7 @@ X86TargetLowering::LowerGlobalAddress(co
Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, OpFlags);
}
- if (Subtarget->isPICStyleRIPRel() &&
+ if (Subtarget.isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result);
else
@@ -12627,35 +12627,35 @@ X86TargetLowering::LowerGlobalTLSAddress
const GlobalValue *GV = GA->getGlobal();
auto PtrVT = getPointerTy(DAG.getDataLayout());
- if (Subtarget->isTargetELF()) {
+ if (Subtarget.isTargetELF()) {
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
switch (model) {
case TLSModel::GeneralDynamic:
- if (Subtarget->is64Bit())
+ if (Subtarget.is64Bit())
return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
case TLSModel::LocalDynamic:
return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT,
- Subtarget->is64Bit());
+ Subtarget.is64Bit());
case TLSModel::InitialExec:
case TLSModel::LocalExec:
- return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget->is64Bit(),
+ return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
DAG.getTarget().getRelocationModel() ==
Reloc::PIC_);
}
llvm_unreachable("Unknown TLS model.");
}
- if (Subtarget->isTargetDarwin()) {
+ if (Subtarget.isTargetDarwin()) {
// Darwin only has one model of TLS. Lower to that.
unsigned char OpFlag = 0;
- unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
+ unsigned WrapperKind = Subtarget.isPICStyleRIPRel() ?
X86ISD::WrapperRIP : X86ISD::Wrapper;
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
bool PIC32 = (DAG.getTarget().getRelocationModel() == Reloc::PIC_) &&
- !Subtarget->is64Bit();
+ !Subtarget.is64Bit();
if (PIC32)
OpFlag = X86II::MO_TLVP_PIC_BASE;
else
@@ -12689,12 +12689,12 @@ X86TargetLowering::LowerGlobalTLSAddress
// And our return value (tls address) is in the standard call return value
// location.
- unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ unsigned Reg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
return DAG.getCopyFromReg(Chain, DL, Reg, PtrVT, Chain.getValue(1));
}
- if (Subtarget->isTargetKnownWindowsMSVC() ||
- Subtarget->isTargetWindowsGNU()) {
+ if (Subtarget.isTargetKnownWindowsMSVC() ||
+ Subtarget.isTargetWindowsGNU()) {
// Just use the implicit TLS architecture
// Need to generate someting similar to:
// mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
@@ -12712,15 +12712,15 @@ X86TargetLowering::LowerGlobalTLSAddress
// Get the Thread Pointer, which is %fs:__tls_array (32-bit) or
// %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly
// use its literal value of 0x2C.
- Value *Ptr = Constant::getNullValue(Subtarget->is64Bit()
+ Value *Ptr = Constant::getNullValue(Subtarget.is64Bit()
? Type::getInt8PtrTy(*DAG.getContext(),
256)
: Type::getInt32PtrTy(*DAG.getContext(),
257));
- SDValue TlsArray = Subtarget->is64Bit()
+ SDValue TlsArray = Subtarget.is64Bit()
? DAG.getIntPtrConstant(0x58, dl)
- : (Subtarget->isTargetWindowsGNU()
+ : (Subtarget.isTargetWindowsGNU()
? DAG.getIntPtrConstant(0x2C, dl)
: DAG.getExternalSymbol("_tls_array", PtrVT));
@@ -12734,7 +12734,7 @@ X86TargetLowering::LowerGlobalTLSAddress
} else {
// Load the _tls_index variable
SDValue IDX = DAG.getExternalSymbol("_tls_index", PtrVT);
- if (Subtarget->is64Bit())
+ if (Subtarget.is64Bit())
IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, IDX,
MachinePointerInfo(), MVT::i32, false, false,
false, 0);
@@ -12850,13 +12850,13 @@ SDValue X86TargetLowering::LowerSINT_TO_
if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
return Op;
if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
- Subtarget->is64Bit()) {
+ Subtarget.is64Bit()) {
return Op;
}
SDValue ValueToStore = Op.getOperand(0);
if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
- !Subtarget->is64Bit())
+ !Subtarget.is64Bit())
// Bitcasting to f64 here allows us to do a single 64-bit store from
// an SSE register, avoiding the store forwarding penalty that would come
// with two 32-bit stores.
@@ -12989,7 +12989,7 @@ SDValue X86TargetLowering::LowerUINT_TO_
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
SDValue Result;
- if (Subtarget->hasSSE3()) {
+ if (Subtarget.hasSSE3()) {
// FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'.
Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
} else {
@@ -13184,10 +13184,10 @@ SDValue X86TargetLowering::lowerUINT_TO_
}
case MVT::v4i32:
case MVT::v8i32:
- return lowerUINT_TO_FP_vXi32(Op, DAG, *Subtarget);
+ return lowerUINT_TO_FP_vXi32(Op, DAG, Subtarget);
case MVT::v16i8:
case MVT::v16i16:
- assert(Subtarget->hasAVX512());
+ assert(Subtarget.hasAVX512());
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0));
}
@@ -13211,8 +13211,8 @@ SDValue X86TargetLowering::LowerUINT_TO_
MVT SrcVT = N0.getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
- if (Subtarget->hasAVX512() && isScalarFPTypeInSSEReg(DstVT) &&
- (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget->is64Bit()))) {
+ if (Subtarget.hasAVX512() && isScalarFPTypeInSSEReg(DstVT) &&
+ (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget.is64Bit()))) {
// Conversions from unsigned i32 to f32/f64 are legal,
// using VCVTUSI2SS/SD. Same for i64 in 64-bit mode.
return Op;
@@ -13222,7 +13222,7 @@ SDValue X86TargetLowering::LowerUINT_TO_
return LowerUINT_TO_FP_i64(Op, DAG);
if (SrcVT == MVT::i32 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i32(Op, DAG);
- if (Subtarget->is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
+ if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
return SDValue();
// Make a 64-bit buffer, and use it to build an FILD.
@@ -13242,7 +13242,7 @@ SDValue X86TargetLowering::LowerUINT_TO_
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
SDValue ValueToStore = Op.getOperand(0);
- if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget->is64Bit())
+ if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit())
// Bitcasting to f64 here allows us to do a single 64-bit store from
// an SSE register, avoiding the store forwarding penalty that would come
// with two 32-bit stores.
@@ -13325,10 +13325,10 @@ X86TargetLowering::FP_TO_INTHelper(SDVal
// used for the 32-bit subtarget, but also for f80 on a 64-bit target.
bool UnsignedFixup = !IsSigned &&
DstTy == MVT::i64 &&
- (!Subtarget->is64Bit() ||
+ (!Subtarget.is64Bit() ||
!isScalarFPTypeInSSEReg(TheVT));
- if (!IsSigned && DstTy != MVT::i64 && !Subtarget->hasAVX512()) {
+ if (!IsSigned && DstTy != MVT::i64 && !Subtarget.hasAVX512()) {
// Replace the fp-to-uint32 operation with an fp-to-sint64 FIST.
// The low 32 bits of the fist result will have the correct uint32 result.
assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
@@ -13343,7 +13343,7 @@ X86TargetLowering::FP_TO_INTHelper(SDVal
if (DstTy == MVT::i32 &&
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
return std::make_pair(SDValue(), SDValue());
- if (Subtarget->is64Bit() &&
+ if (Subtarget.is64Bit() &&
DstTy == MVT::i64 &&
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
return std::make_pair(SDValue(), SDValue());
@@ -13459,7 +13459,7 @@ X86TargetLowering::FP_TO_INTHelper(SDVal
false, false, false, 0);
High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust);
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
// Join High32 and Low32 into a 64-bit result.
// (High32 << 32) | Low32
Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32);
@@ -13486,7 +13486,7 @@ X86TargetLowering::FP_TO_INTHelper(SDVal
}
static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
@@ -13513,7 +13513,7 @@ static SDValue LowerAVXExtend(SDValue Op
((VT != MVT::v4i64) || (InVT != MVT::v4i32)))
return SDValue();
- if (Subtarget->hasInt256())
+ if (Subtarget.hasInt256())
return DAG.getNode(X86ISD::VZEXT, dl, VT, In);
SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
@@ -13532,13 +13532,13 @@ static SDValue LowerAVXExtend(SDValue Op
}
static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
- const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ const X86Subtarget &Subtarget, SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
SDLoc DL(Op);
unsigned int NumElts = VT.getVectorNumElements();
- if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI())
+ if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI())
return SDValue();
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
@@ -13557,16 +13557,16 @@ static SDValue LowerZERO_EXTEND_AVX512(
return DAG.getNode(X86ISD::VTRUNC, DL, VT, V);
}
-static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- if (Subtarget->hasFp256())
+ if (Subtarget.hasFp256())
if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget))
return Res;
return SDValue();
}
-static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
@@ -13576,7 +13576,7 @@ static SDValue LowerZERO_EXTEND(SDValue
if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
return LowerZERO_EXTEND_AVX512(Op, Subtarget, DAG);
- if (Subtarget->hasFp256())
+ if (Subtarget.hasFp256())
if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget))
return Res;
@@ -13586,7 +13586,7 @@ static SDValue LowerZERO_EXTEND(SDValue
}
static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
@@ -13598,10 +13598,10 @@ static SDValue LowerTruncateVecI1(SDValu
// Shift LSB to MSB and use VPMOVB2M - SKX.
unsigned ShiftInx = InVT.getScalarSizeInBits() - 1;
if ((InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 &&
- Subtarget->hasBWI()) || // legal, will go to VPMOVB2M, VPMOVW2M
+ Subtarget.hasBWI()) || // legal, will go to VPMOVB2M, VPMOVW2M
((InVT.is256BitVector() || InVT.is128BitVector()) &&
- InVT.getScalarSizeInBits() <= 16 && Subtarget->hasBWI() &&
- Subtarget->hasVLX())) { // legal, will go to VPMOVB2M, VPMOVW2M
+ InVT.getScalarSizeInBits() <= 16 && Subtarget.hasBWI() &&
+ Subtarget.hasVLX())) { // legal, will go to VPMOVB2M, VPMOVW2M
// Shift packed bytes not supported natively, bitcast to dword
MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, ExtVT,
@@ -13611,10 +13611,10 @@ static SDValue LowerTruncateVecI1(SDValu
return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode);
}
if ((InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 &&
- Subtarget->hasDQI()) || // legal, will go to VPMOVD2M, VPMOVQ2M
+ Subtarget.hasDQI()) || // legal, will go to VPMOVD2M, VPMOVQ2M
((InVT.is256BitVector() || InVT.is128BitVector()) &&
- InVT.getScalarSizeInBits() >= 32 && Subtarget->hasDQI() &&
- Subtarget->hasVLX())) { // legal, will go to VPMOVD2M, VPMOVQ2M
+ InVT.getScalarSizeInBits() >= 32 && Subtarget.hasDQI() &&
+ Subtarget.hasVLX())) { // legal, will go to VPMOVD2M, VPMOVQ2M
SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In,
DAG.getConstant(ShiftInx, DL, InVT));
@@ -13625,7 +13625,7 @@ static SDValue LowerTruncateVecI1(SDValu
unsigned NumElts = InVT.getVectorNumElements();
if (InVT.getSizeInBits() < 512 &&
(InVT.getScalarType() == MVT::i8 || InVT.getScalarType() == MVT::i16 ||
- !Subtarget->hasVLX())) {
+ !Subtarget.hasVLX())) {
assert((NumElts == 8 || NumElts == 16) && "Unexpected vector type.");
// TESTD/Q should be used (if BW supported we use CVT2MASK above),
@@ -13662,16 +13662,16 @@ SDValue X86TargetLowering::LowerTRUNCATE
return LowerTruncateVecI1(Op, DAG, Subtarget);
// vpmovqb/w/d, vpmovdb/w, vpmovwb
- if (Subtarget->hasAVX512()) {
+ if (Subtarget.hasAVX512()) {
// word to byte only under BWI
- if (InVT == MVT::v16i16 && !Subtarget->hasBWI()) // v16i16 -> v16i8
+ if (InVT == MVT::v16i16 && !Subtarget.hasBWI()) // v16i16 -> v16i8
return DAG.getNode(X86ISD::VTRUNC, DL, VT,
DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In));
return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
}
if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
- if (Subtarget->hasInt256()) {
+ if (Subtarget.hasInt256()) {
static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
In = DAG.getBitcast(MVT::v8i32, In);
In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32),
@@ -13692,7 +13692,7 @@ SDValue X86TargetLowering::LowerTRUNCATE
if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
// On AVX2, v8i32 -> v8i16 becomed PSHUFB.
- if (Subtarget->hasInt256()) {
+ if (Subtarget.hasInt256()) {
In = DAG.getBitcast(MVT::v32i8, In);
SmallVector<SDValue,32> pshufbMask;
@@ -13750,7 +13750,7 @@ SDValue X86TargetLowering::LowerTRUNCATE
if (!VT.is128BitVector() || !InVT.is256BitVector())
return SDValue();
- assert(Subtarget->hasFp256() && "256-bit vector without AVX!");
+ assert(Subtarget.hasFp256() && "256-bit vector without AVX!");
unsigned NumElems = VT.getVectorNumElements();
MVT NVT = MVT::getVectorVT(VT.getVectorElementType(), NumElems * 2);
@@ -13998,11 +13998,11 @@ static SDValue LowerFGETSIGN(SDValue Op,
}
// Check whether an OR'd tree is PTEST-able.
-static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
- if (!Subtarget->hasSSE41())
+ if (!Subtarget.hasSSE41())
return SDValue();
if (!Op->hasOneUse())
@@ -14210,14 +14210,14 @@ SDValue X86TargetLowering::EmitTest(SDVa
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
// An add of one will be selected as an INC.
- if (C->isOne() && !Subtarget->slowIncDec()) {
+ if (C->isOne() && !Subtarget.slowIncDec()) {
Opcode = X86ISD::INC;
NumOperands = 1;
break;
}
// An add of negative one (subtract of one) will be selected as a DEC.
- if (C->isAllOnesValue() && !Subtarget->slowIncDec()) {
+ if (C->isAllOnesValue() && !Subtarget.slowIncDec()) {
Opcode = X86ISD::DEC;
NumOperands = 1;
break;
@@ -14360,7 +14360,7 @@ SDValue X86TargetLowering::EmitCmp(SDVal
// of memory operations.
if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 &&
!DAG.getMachineFunction().getFunction()->optForMinSize() &&
- !Subtarget->isAtom()) {
+ !Subtarget.isAtom()) {
unsigned ExtendOp =
isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
Op0 = DAG.getNode(ExtendOp, dl, MVT::i32, Op0);
@@ -14380,7 +14380,7 @@ SDValue X86TargetLowering::ConvertCmpIfN
SelectionDAG &DAG) const {
// If the subtarget does not support the FUCOMI instruction, floating-point
// comparisons have to be converted.
- if (Subtarget->hasCMov() ||
+ if (Subtarget.hasCMov() ||
Cmp.getOpcode() != X86ISD::CMP ||
!Cmp.getOperand(0).getValueType().isFloatingPoint() ||
!Cmp.getOperand(1).getValueType().isFloatingPoint())
@@ -14398,7 +14398,7 @@ SDValue X86TargetLowering::ConvertCmpIfN
SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);
// Some 64-bit targets lack SAHF support, but they do support FCOMI.
- assert(Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?");
+ assert(Subtarget.hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?");
return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
}
@@ -14418,10 +14418,10 @@ SDValue X86TargetLowering::getRsqrtEstim
// instructions: convert to single, rsqrtss, convert back to double, refine
// (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
// along with FMA, this could be a throughput win.
- if (VT == MVT::f32 && Subtarget->hasSSE1())
+ if (VT == MVT::f32 && Subtarget.hasSSE1())
RecipOp = "sqrtf";
- else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) ||
- (VT == MVT::v8f32 && Subtarget->hasAVX()))
+ else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
+ (VT == MVT::v8f32 && Subtarget.hasAVX()))
RecipOp = "vec-sqrtf";
else
return SDValue();
@@ -14450,10 +14450,10 @@ SDValue X86TargetLowering::getRecipEstim
// 15 instructions: convert to single, rcpss, convert back to double, refine
// (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA
// along with FMA, this could be a throughput win.
- if (VT == MVT::f32 && Subtarget->hasSSE1())
+ if (VT == MVT::f32 && Subtarget.hasSSE1())
RecipOp = "divf";
- else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) ||
- (VT == MVT::v8f32 && Subtarget->hasAVX()))
+ else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
+ (VT == MVT::v8f32 && Subtarget.hasAVX()))
RecipOp = "vec-divf";
else
return SDValue();
@@ -14665,7 +14665,7 @@ static SDValue LowerBoolVSETCC_AVX512(SD
}
static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
@@ -14734,7 +14734,7 @@ static SDValue ChangeVSETULTtoVSETULE(SD
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, ULTOp1);
}
-static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
@@ -14752,7 +14752,7 @@ static SDValue LowerVSETCC(SDValue Op, c
unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1);
unsigned Opc = X86ISD::CMPP;
- if (Subtarget->hasAVX512() && VT.getVectorElementType() == MVT::i1) {
+ if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1) {
assert(VT.getVectorNumElements() <= 16);
Opc = X86ISD::CMPM;
}
@@ -14804,11 +14804,11 @@ static SDValue LowerVSETCC(SDValue Op, c
// The non-AVX512 code below works under the assumption that source and
// destination types are the same.
- assert((Subtarget->hasAVX512() || (VT == VTOp0)) &&
+ assert((Subtarget.hasAVX512() || (VT == VTOp0)) &&
"Value types for source and destination must be the same!");
// Break 256-bit integer vector compare into smaller ones.
- if (VT.is256BitVector() && !Subtarget->hasInt256())
+ if (VT.is256BitVector() && !Subtarget.hasInt256())
return Lower256IntVSETCC(Op, DAG);
MVT OpVT = Op1.getSimpleValueType();
@@ -14816,9 +14816,9 @@ static SDValue LowerVSETCC(SDValue Op, c
return LowerBoolVSETCC_AVX512(Op, DAG);
bool MaskResult = (VT.getVectorElementType() == MVT::i1);
- if (Subtarget->hasAVX512()) {
+ if (Subtarget.hasAVX512()) {
if (Op1.getSimpleValueType().is512BitVector() ||
- (Subtarget->hasBWI() && Subtarget->hasVLX()) ||
+ (Subtarget.hasBWI() && Subtarget.hasVLX()) ||
(MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
return LowerIntVSETCC_AVX512(Op, DAG, Subtarget);
@@ -14835,7 +14835,7 @@ static SDValue LowerVSETCC(SDValue Op, c
// Lower using XOP integer comparisons.
if ((VT == MVT::v16i8 || VT == MVT::v8i16 ||
- VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget->hasXOP()) {
+ VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget.hasXOP()) {
// Translate compare code to XOP PCOM compare mode.
unsigned CmpMode = 0;
switch (SetCCOpcode) {
@@ -14887,8 +14887,8 @@ static SDValue LowerVSETCC(SDValue Op, c
// Special case: Use min/max operations for SETULE/SETUGE
MVT VET = VT.getVectorElementType();
bool hasMinMax =
- (Subtarget->hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32))
- || (Subtarget->hasSSE2() && (VET == MVT::i8));
+ (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32))
+ || (Subtarget.hasSSE2() && (VET == MVT::i8));
if (hasMinMax) {
switch (SetCCOpcode) {
@@ -14900,7 +14900,7 @@ static SDValue LowerVSETCC(SDValue Op, c
if (MinMax) { Swap = false; Invert = false; FlipSigns = false; }
}
- bool hasSubus = Subtarget->hasSSE2() && (VET == MVT::i8 || VET == MVT::i16);
+ bool hasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16);
if (!MinMax && hasSubus) {
// As another special case, use PSUBUS[BW] when it's profitable. E.g. for
// Op0 u<= Op1:
@@ -14914,7 +14914,7 @@ static SDValue LowerVSETCC(SDValue Op, c
// beneficial because the constant in the register is no longer
// destructed as the destination so it can be hoisted out of a loop.
// Only do this pre-AVX since vpcmp* is no longer destructive.
- if (Subtarget->hasAVX())
+ if (Subtarget.hasAVX())
break;
SDValue ULEOp1 = ChangeVSETULTtoVSETULE(dl, Op1, DAG);
if (ULEOp1.getNode()) {
@@ -14940,8 +14940,8 @@ static SDValue LowerVSETCC(SDValue Op, c
// Check that the operation in question is available (most are plain SSE2,
// but PCMPGTQ and PCMPEQQ have different requirements).
if (VT == MVT::v2i64) {
- if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) {
- assert(Subtarget->hasSSE2() && "Don't know how to lower!");
+ if (Opc == X86ISD::PCMPGT && !Subtarget.hasSSE42()) {
+ assert(Subtarget.hasSSE2() && "Don't know how to lower!");
// First cast everything to the right type.
Op0 = DAG.getBitcast(MVT::v4i32, Op0);
@@ -14982,10 +14982,10 @@ static SDValue LowerVSETCC(SDValue Op, c
return DAG.getBitcast(VT, Result);
}
- if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
+ if (Opc == X86ISD::PCMPEQ && !Subtarget.hasSSE41()) {
// If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
// pcmpeqd + pshufd + pand.
- assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
+ assert(Subtarget.hasSSE2() && !FlipSigns && "Don't know how to lower!");
// First cast everything to the right type.
Op0 = DAG.getBitcast(MVT::v4i32, Op0);
@@ -15038,7 +15038,7 @@ SDValue X86TargetLowering::LowerSETCC(SD
if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
- assert(((!Subtarget->hasAVX512() && VT == MVT::i8) || (VT == MVT::i1))
+ assert(((!Subtarget.hasAVX512() && VT == MVT::i8) || (VT == MVT::i1))
&& "SetCC type must be 8-bit or 1-bit integer");
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
@@ -15168,15 +15168,15 @@ SDValue X86TargetLowering::LowerSELECT(S
// are available or VBLENDV if AVX is available.
// Otherwise FP cmovs get lowered into a less efficient branch sequence later.
if (Cond.getOpcode() == ISD::SETCC &&
- ((Subtarget->hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) ||
- (Subtarget->hasSSE1() && VT == MVT::f32)) &&
+ ((Subtarget.hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) ||
+ (Subtarget.hasSSE1() && VT == MVT::f32)) &&
VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) {
SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1);
int SSECC = translateX86FSETCC(
cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
if (SSECC != 8) {
- if (Subtarget->hasAVX512()) {
+ if (Subtarget.hasAVX512()) {
SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CondOp0, CondOp1,
DAG.getConstant(SSECC, DL, MVT::i8));
return DAG.getNode(X86ISD::SELECT, DL, VT, Cmp, Op1, Op2);
@@ -15198,7 +15198,7 @@ SDValue X86TargetLowering::LowerSELECT(S
// instructions as the AND/ANDN/OR sequence due to register moves, so
// don't bother.
- if (Subtarget->hasAVX() &&
+ if (Subtarget.hasAVX() &&
!isa<ConstantFPSDNode>(Op1) && !isa<ConstantFPSDNode>(Op2)) {
// Convert to vectors, do a VSELECT, and convert back to scalar.
@@ -15438,7 +15438,7 @@ SDValue X86TargetLowering::LowerSELECT(S
}
static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
@@ -15449,22 +15449,22 @@ static SDValue LowerSIGN_EXTEND_AVX512(S
// SKX processor
if ((InVTElt == MVT::i1) &&
- (((Subtarget->hasBWI() && Subtarget->hasVLX() &&
+ (((Subtarget.hasBWI() && Subtarget.hasVLX() &&
VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() <= 16)) ||
- ((Subtarget->hasBWI() && VT.is512BitVector() &&
+ ((Subtarget.hasBWI() && VT.is512BitVector() &&
VTElt.getSizeInBits() <= 16)) ||
- ((Subtarget->hasDQI() && Subtarget->hasVLX() &&
+ ((Subtarget.hasDQI() && Subtarget.hasVLX() &&
VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() >= 32)) ||
- ((Subtarget->hasDQI() && VT.is512BitVector() &&
+ ((Subtarget.hasDQI() && VT.is512BitVector() &&
VTElt.getSizeInBits() >= 32))))
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
unsigned int NumElts = VT.getVectorNumElements();
- if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI())
+ if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI())
return SDValue();
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {
@@ -15488,7 +15488,7 @@ static SDValue LowerSIGN_EXTEND_AVX512(S
}
static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue In = Op->getOperand(0);
MVT VT = Op->getSimpleValueType(0);
@@ -15506,7 +15506,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_I
SDLoc dl(Op);
// SSE41 targets can use the pmovsx* instructions directly.
- if (Subtarget->hasSSE41())
+ if (Subtarget.hasSSE41())
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
// pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI.
@@ -15543,7 +15543,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_I
return SDValue();
}
-static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
@@ -15558,7 +15558,7 @@ static SDValue LowerSIGN_EXTEND(SDValue
(VT != MVT::v16i16 || InVT != MVT::v16i8))
return SDValue();
- if (Subtarget->hasInt256())
+ if (Subtarget.hasInt256())
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
// Optimize vectors in AVX mode
@@ -15601,7 +15601,7 @@ static SDValue LowerSIGN_EXTEND(SDValue
// FIXME: Is the expansion actually better than scalar code? It doesn't seem so.
// TODO: It is possible to support ZExt by zeroing the undef values during
// the shuffle phase or after the shuffle.
-static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT RegVT = Op.getSimpleValueType();
assert(RegVT.isVector() && "We only custom lower vector sext loads.");
@@ -15609,7 +15609,7 @@ static SDValue LowerExtendedLoad(SDValue
"We only custom lower integer vector sext loads.");
// Nothing useful we can do without SSE2 shuffles.
- assert(Subtarget->hasSSE2() && "We only custom lower sext loads with SSE2.");
+ assert(Subtarget.hasSSE2() && "We only custom lower sext loads with SSE2.");
LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
SDLoc dl(Ld);
@@ -15628,7 +15628,7 @@ static SDValue LowerExtendedLoad(SDValue
unsigned MemSz = MemVT.getSizeInBits();
assert(RegSz > MemSz && "Register size must be greater than the mem size");
- if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256()) {
+ if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget.hasInt256()) {
// The only way in which we have a legal 256-bit vector result but not the
// integer 256-bit operations needed to directly lower a sextload is if we
// have AVX1 but not AVX2. In that case, we can always emit a sextload to
@@ -15751,7 +15751,7 @@ static SDValue LowerExtendedLoad(SDValue
if (Ext == ISD::SEXTLOAD) {
// If we have SSE4.1, we can directly emit a VSEXT node.
- if (Subtarget->hasSSE41()) {
+ if (Subtarget.hasSSE41()) {
SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
return Sext;
@@ -16087,7 +16087,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALL
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool SplitStack = MF.shouldSplitStack();
- bool Lower = (Subtarget->isOSWindows() && !Subtarget->isTargetMachO()) ||
+ bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) ||
SplitStack;
SDLoc dl(Op);
@@ -16102,7 +16102,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALL
// pointer when other instructions are using the stack.
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl);
- bool Is64Bit = Subtarget->is64Bit();
+ bool Is64Bit = Subtarget.is64Bit();
MVT SPTy = getPointerTy(DAG.getDataLayout());
SDValue Result;
@@ -16117,7 +16117,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALL
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
- const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+ const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
unsigned StackAlign = TFI.getStackAlignment();
Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
if (Align > StackAlign)
@@ -16146,7 +16146,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALL
DAG.getRegister(Vreg, SPTy));
} else {
SDValue Flag;
- const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX);
+ const unsigned Reg = (Subtarget.isTarget64BitLP64() ? X86::RAX : X86::EAX);
Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
Flag = Chain.getValue(1);
@@ -16154,7 +16154,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALL
Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
- const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
unsigned SPReg = RegInfo->getStackRegister();
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
Chain = SP.getValue(1);
@@ -16183,8 +16183,8 @@ SDValue X86TargetLowering::LowerVASTART(
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
SDLoc DL(Op);
- if (!Subtarget->is64Bit() ||
- Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) {
+ if (!Subtarget.is64Bit() ||
+ Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv())) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
@@ -16224,21 +16224,21 @@ SDValue X86TargetLowering::LowerVASTART(
// Store ptr to reg_save_area.
FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(
- Subtarget->isTarget64BitLP64() ? 8 : 4, DL));
+ Subtarget.isTarget64BitLP64() ? 8 : 4, DL));
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT);
Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, MachinePointerInfo(
- SV, Subtarget->isTarget64BitLP64() ? 16 : 12), false, false, 0);
+ SV, Subtarget.isTarget64BitLP64() ? 16 : 12), false, false, 0);
MemOps.push_back(Store);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
}
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
- assert(Subtarget->is64Bit() &&
+ assert(Subtarget.is64Bit() &&
"LowerVAARG only handles 64-bit va_arg!");
assert(Op.getNode()->getNumOperands() == 4);
MachineFunction &MF = DAG.getMachineFunction();
- if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()))
+ if (Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()))
// The Win64 ABI uses char* instead of a structure.
return DAG.expandVAArg(Op.getNode());
@@ -16268,9 +16268,9 @@ SDValue X86TargetLowering::LowerVAARG(SD
if (ArgMode == 2) {
// Sanity Check: Make sure using fp_offset makes sense.
- assert(!Subtarget->useSoftFloat() &&
+ assert(!Subtarget.useSoftFloat() &&
!(MF.getFunction()->hasFnAttribute(Attribute::NoImplicitFloat)) &&
- Subtarget->hasSSE1());
+ Subtarget.hasSSE1());
}
// Insert VAARG_64 node into the DAG
@@ -16296,12 +16296,12 @@ SDValue X86TargetLowering::LowerVAARG(SD
false, false, false, 0);
}
-static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
// X86-64 va_list is a struct { i32, i32, i8*, i8* }, except on Windows,
// where a va_list is still an i8*.
- assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
- if (Subtarget->isCallingConvWin64(
+ assert(Subtarget.is64Bit() && "This code only handles 64-bit va_copy!");
+ if (Subtarget.isCallingConvWin64(
DAG.getMachineFunction().getFunction()->getCallingConv()))
// Probably a Win64 va_copy.
return DAG.expandVACopy(Op.getNode());
@@ -16424,7 +16424,7 @@ static SDValue getTargetVShiftNode(unsig
// Let the shuffle legalizer expand this shift amount node.
SDValue Op0 = ShAmt.getOperand(0);
Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(Op0), MVT::v8i16, Op0);
- ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, &Subtarget, DAG);
+ ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, Subtarget, DAG);
} else {
// Need to build a vector containing shift amount.
// SSE/AVX packed shifts only use the lower 64-bit of the shift count.
@@ -16452,7 +16452,7 @@ static SDValue getTargetVShiftNode(unsig
/// \brief Return Mask with the necessary casting or extending
/// for \p Mask according to \p MaskVT when lowering masking intrinsics
static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG, SDLoc dl) {
if (MaskVT.bitsGT(Mask.getSimpleValueType())) {
@@ -16461,9 +16461,9 @@ static SDValue getMaskNode(SDValue Mask,
MVT::getIntegerVT(MaskVT.getSizeInBits()), Mask);
}
- if (Mask.getSimpleValueType() == MVT::i64 && Subtarget->is32Bit()) {
+ if (Mask.getSimpleValueType() == MVT::i64 && Subtarget.is32Bit()) {
if (MaskVT == MVT::v64i1) {
- assert(Subtarget->hasBWI() && "Expected AVX512BW target!");
+ assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
// In case 32bit mode, bitcast i64 is illegal, extend/split it.
SDValue Lo, Hi;
Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
@@ -16499,7 +16499,7 @@ static SDValue getMaskNode(SDValue Mask,
/// necessary casting or extending for \p Mask when lowering masking intrinsics
static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
SDValue PreservedSrc,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
@@ -16544,7 +16544,7 @@ static SDValue getVectorMaskingNode(SDVa
/// for a scalar instruction.
static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
SDValue PreservedSrc,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
if (isAllOnesConstant(Mask))
return Op;
@@ -16626,7 +16626,7 @@ static SDValue recoverFramePointer(Selec
return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset);
}
-static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -17352,7 +17352,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
// Returns one of the stack, base, or frame pointer registers, depending on
// which is used to reference local variables.
MachineFunction &MF = DAG.getMachineFunction();
- const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
unsigned Reg;
if (RegInfo->hasBasePointer(MF))
Reg = RegInfo->getBaseRegister();
@@ -17366,7 +17366,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Src, SDValue Mask, SDValue Base,
SDValue Index, SDValue ScaleOp, SDValue Chain,
- const X86Subtarget * Subtarget) {
+ const X86Subtarget &Subtarget) {
SDLoc dl(Op);
auto *C = cast<ConstantSDNode>(ScaleOp);
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
@@ -17452,7 +17452,7 @@ static SDValue getPrefetchNode(unsigned
// getReadPerformanceCounter - Handles the lowering of builtin intrinsics that
// read performance monitor counters (x86_rdpmc).
static void getReadPerformanceCounter(SDNode *N, SDLoc DL,
- SelectionDAG &DAG, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG, const X86Subtarget &Subtarget,
SmallVectorImpl<SDValue> &Results) {
assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -17466,7 +17466,7 @@ static void getReadPerformanceCounter(SD
// Reads the content of a 64-bit performance counter and returns it in the
// registers EDX:EAX.
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
LO.getValue(2));
@@ -17477,7 +17477,7 @@ static void getReadPerformanceCounter(SD
}
Chain = HI.getValue(1);
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
// The EAX register is loaded with the low-order 32 bits. The EDX register
// is loaded with the supported high-order bits of the counter.
SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
@@ -17498,7 +17498,7 @@ static void getReadPerformanceCounter(SD
// read the time stamp counter (x86_rdtsc and x86_rdtscp). This function is
// also used to custom lower READCYCLECOUNTER nodes.
static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode,
- SelectionDAG &DAG, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG, const X86Subtarget &Subtarget,
SmallVectorImpl<SDValue> &Results) {
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue rd = DAG.getNode(Opcode, DL, Tys, N->getOperand(0));
@@ -17507,7 +17507,7 @@ static void getReadTimeStampCounter(SDNo
// The processor's time-stamp counter (a 64-bit MSR) is stored into the
// EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR
// and the EAX register is loaded with the low-order 32 bits.
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
LO.getValue(2));
@@ -17531,7 +17531,7 @@ static void getReadTimeStampCounter(SDNo
MachinePointerInfo(), false, false, 0);
}
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
// The EDX register is loaded with the high-order 32 bits of the MSR, and
// the EAX register is loaded with the low-order 32 bits.
SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
@@ -17548,7 +17548,7 @@ static void getReadTimeStampCounter(SDNo
Results.push_back(Chain);
}
-static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SmallVector<SDValue, 2> Results;
SDLoc DL(Op);
@@ -17575,7 +17575,7 @@ static SDValue MarkEHRegistrationNode(SD
return Chain;
}
-static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
@@ -17832,7 +17832,7 @@ SDValue X86TargetLowering::LowerRETURNAD
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
- const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), dl, PtrVT);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, PtrVT,
@@ -17850,7 +17850,7 @@ SDValue X86TargetLowering::LowerFRAMEADD
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
- const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
EVT VT = Op.getValueType();
MFI->setFrameAddressIsTaken(true);
@@ -17889,7 +17889,7 @@ SDValue X86TargetLowering::LowerFRAMEADD
// this table could be generated automatically from RegInfo.
unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const {
- const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+ const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
const MachineFunction &MF = DAG.getMachineFunction();
unsigned Reg = StringSwitch<unsigned>(RegName)
@@ -17905,7 +17905,7 @@ unsigned X86TargetLowering::getRegisterB
" is allocatable: function has no frame pointer");
#ifndef NDEBUG
else {
- const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
unsigned FrameReg =
RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
assert((FrameReg == X86::EBP || FrameReg == X86::RBP) &&
@@ -17922,23 +17922,23 @@ unsigned X86TargetLowering::getRegisterB
SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
SelectionDAG &DAG) const {
- const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op));
}
unsigned X86TargetLowering::getExceptionPointerRegister(
const Constant *PersonalityFn) const {
if (classifyEHPersonality(PersonalityFn) == EHPersonality::CoreCLR)
- return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX;
+ return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX;
- return Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
+ return Subtarget.isTarget64BitLP64() ? X86::RAX : X86::EAX;
}
unsigned X86TargetLowering::getExceptionSelectorRegister(
const Constant *PersonalityFn) const {
// Funclet personalities don't use selectors (the runtime does the selection).
assert(!isFuncletEHPersonality(classifyEHPersonality(PersonalityFn)));
- return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX;
+ return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX;
}
SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
@@ -17948,7 +17948,7 @@ SDValue X86TargetLowering::LowerEH_RETUR
SDLoc dl (Op);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
- const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
(FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
@@ -17996,9 +17996,9 @@ SDValue X86TargetLowering::LowerINIT_TRA
SDLoc dl (Op);
const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
SDValue OutChains[6];
// Large code-model.
@@ -18161,7 +18161,7 @@ SDValue X86TargetLowering::LowerFLT_ROUN
*/
MachineFunction &MF = DAG.getMachineFunction();
- const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+ const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
@@ -18266,14 +18266,14 @@ static SDValue LowerVectorCTLZ_AVX512(SD
return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta);
}
-static SDValue LowerCTLZ(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
MVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
SDLoc dl(Op);
- if (VT.isVector() && Subtarget->hasAVX512())
+ if (VT.isVector() && Subtarget.hasAVX512())
return LowerVectorCTLZ_AVX512(Op, DAG);
Op = Op.getOperand(0);
@@ -18305,7 +18305,7 @@ static SDValue LowerCTLZ(SDValue Op, con
return Op;
}
-static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
EVT OpVT = VT;
@@ -18434,7 +18434,7 @@ static SDValue LowerMINMAX(SDValue Op, S
return Lower256IntArith(Op, DAG);
}
-static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
@@ -18443,7 +18443,7 @@ static SDValue LowerMUL(SDValue Op, cons
return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1));
// Decompose 256-bit ops into smaller 128-bit ops.
- if (VT.is256BitVector() && !Subtarget->hasInt256())
+ if (VT.is256BitVector() && !Subtarget.hasInt256())
return Lower256IntArith(Op, DAG);
SDValue A = Op.getOperand(0);
@@ -18452,7 +18452,7 @@ static SDValue LowerMUL(SDValue Op, cons
// Lower v16i8/v32i8 mul as promotion to v8i16/v16i16 vector
// pairs, multiply and truncate.
if (VT == MVT::v16i8 || VT == MVT::v32i8) {
- if (Subtarget->hasInt256()) {
+ if (Subtarget.hasInt256()) {
if (VT == MVT::v32i8) {
MVT SubVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() / 2);
SDValue Lo = DAG.getIntPtrConstant(0, dl);
@@ -18480,7 +18480,7 @@ static SDValue LowerMUL(SDValue Op, cons
// Extract the lo parts and sign extend to i16
SDValue ALo, BLo;
- if (Subtarget->hasSSE41()) {
+ if (Subtarget.hasSSE41()) {
ALo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, A);
BLo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, B);
} else {
@@ -18496,7 +18496,7 @@ static SDValue LowerMUL(SDValue Op, cons
// Extract the hi parts and sign extend to i16
SDValue AHi, BHi;
- if (Subtarget->hasSSE41()) {
+ if (Subtarget.hasSSE41()) {
const int ShufMask[] = {8, 9, 10, 11, 12, 13, 14, 15,
-1, -1, -1, -1, -1, -1, -1, -1};
AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
@@ -18524,7 +18524,7 @@ static SDValue LowerMUL(SDValue Op, cons
// Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
if (VT == MVT::v4i32) {
- assert(Subtarget->hasSSE2() && !Subtarget->hasSSE41() &&
+ assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() &&
"Should not custom lower when pmuldq is available!");
// Extract the odd parts.
@@ -18589,7 +18589,7 @@ static SDValue LowerMUL(SDValue Op, cons
}
SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const {
- assert(Subtarget->isTargetWin64() && "Unexpected target");
+ assert(Subtarget.isTargetWin64() && "Unexpected target");
EVT VT = Op.getValueType();
assert(VT.isInteger() && VT.getSizeInBits() == 128 &&
"Unexpected return type for lowering");
@@ -18640,14 +18640,14 @@ SDValue X86TargetLowering::LowerWin64_i1
return DAG.getBitcast(VT, CallInfo.first);
}
-static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
MVT VT = Op0.getSimpleValueType();
SDLoc dl(Op);
- assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) ||
- (VT == MVT::v8i32 && Subtarget->hasInt256()));
+ assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) ||
+ (VT == MVT::v8i32 && Subtarget.hasInt256()));
// PMULxD operations multiply each even value (starting at 0) of LHS with
// the related value of RHS and produce a widen result.
@@ -18672,7 +18672,7 @@ static SDValue LowerMUL_LOHI(SDValue Op,
MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64;
bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI;
unsigned Opcode =
- (!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
+ (!IsSigned || !Subtarget.hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
// PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
// => <2 x i64> <ae|cg>
SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Op0, Op1));
@@ -18696,7 +18696,7 @@ static SDValue LowerMUL_LOHI(SDValue Op,
// If we have a signed multiply but no PMULDQ fix up the high parts of a
// unsigned multiply.
- if (IsSigned && !Subtarget->hasSSE41()) {
+ if (IsSigned && !Subtarget.hasSSE41()) {
SDValue ShAmt = DAG.getConstant(
31, dl,
DAG.getTargetLoweringInfo().getShiftAmountTy(VT, DAG.getDataLayout()));
@@ -18717,19 +18717,19 @@ static SDValue LowerMUL_LOHI(SDValue Op,
// Return true if the required (according to Opcode) shift-imm form is natively
// supported by the Subtarget
-static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget,
+static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
unsigned Opcode) {
if (VT.getScalarSizeInBits() < 16)
return false;
if (VT.is512BitVector() &&
- (VT.getScalarSizeInBits() > 16 || Subtarget->hasBWI()))
+ (VT.getScalarSizeInBits() > 16 || Subtarget.hasBWI()))
return true;
bool LShift = VT.is128BitVector() ||
- (VT.is256BitVector() && Subtarget->hasInt256());
+ (VT.is256BitVector() && Subtarget.hasInt256());
- bool AShift = LShift && (Subtarget->hasVLX() ||
+ bool AShift = LShift && (Subtarget.hasVLX() ||
(VT != MVT::v2i64 && VT != MVT::v4i64));
return (Opcode == ISD::SRA) ? AShift : LShift;
}
@@ -18737,24 +18737,24 @@ static bool SupportedVectorShiftWithImm(
// The shift amount is a variable, but it is the same for all vector lanes.
// These instructions are defined together with shift-immediate.
static
-bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget,
+bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget,
unsigned Opcode) {
return SupportedVectorShiftWithImm(VT, Subtarget, Opcode);
}
// Return true if the required (according to Opcode) variable-shift form is
// natively supported by the Subtarget
-static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget,
+static bool SupportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget,
unsigned Opcode) {
- if (!Subtarget->hasInt256() || VT.getScalarSizeInBits() < 16)
+ if (!Subtarget.hasInt256() || VT.getScalarSizeInBits() < 16)
return false;
// vXi16 supported only on AVX-512, BWI
- if (VT.getScalarSizeInBits() == 16 && !Subtarget->hasBWI())
+ if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI())
return false;
- if (VT.is512BitVector() || Subtarget->hasVLX())
+ if (VT.is512BitVector() || Subtarget.hasVLX())
return true;
bool LShift = VT.is128BitVector() || VT.is256BitVector();
@@ -18763,7 +18763,7 @@ static bool SupportedVectorVarShift(MVT
}
static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
@@ -18813,12 +18813,12 @@ static SDValue LowerScalarImmediateShift
return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
// i64 SRA needs to be performed as partial shifts.
- if ((VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
- Op.getOpcode() == ISD::SRA && !Subtarget->hasXOP())
+ if ((VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64)) &&
+ Op.getOpcode() == ISD::SRA && !Subtarget.hasXOP())
return ArithmeticShiftRight64(ShiftAmt);
if (VT == MVT::v16i8 ||
- (Subtarget->hasInt256() && VT == MVT::v32i8) ||
+ (Subtarget.hasInt256() && VT == MVT::v32i8) ||
VT == MVT::v64i8) {
unsigned NumElts = VT.getVectorNumElements();
MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
@@ -18834,7 +18834,7 @@ static SDValue LowerScalarImmediateShift
}
// XOP can shift v16i8 directly instead of as shift v8i16 + mask.
- if (VT == MVT::v16i8 && Subtarget->hasXOP())
+ if (VT == MVT::v16i8 && Subtarget.hasXOP())
return SDValue();
if (Op.getOpcode() == ISD::SHL) {
@@ -18870,8 +18870,8 @@ static SDValue LowerScalarImmediateShift
}
// Special case in 32-bit mode, where i64 is expanded into high and low parts.
- if (!Subtarget->is64Bit() && !Subtarget->hasXOP() &&
- (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64))) {
+ if (!Subtarget.is64Bit() && !Subtarget.hasXOP() &&
+ (VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64))) {
// Peek through any splat that was introduced for i64 shift vectorization.
int SplatIndex = -1;
@@ -18928,7 +18928,7 @@ static SDValue LowerScalarImmediateShift
}
static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
- const X86Subtarget* Subtarget) {
+ const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
@@ -18989,7 +18989,7 @@ static SDValue LowerScalarVariableShift(
}
// Special case in 32-bit mode, where i64 is expanded into high and low parts.
- if (!Subtarget->is64Bit() && VT == MVT::v2i64 &&
+ if (!Subtarget.is64Bit() && VT == MVT::v2i64 &&
Amt.getOpcode() == ISD::BITCAST &&
Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
Amt = Amt.getOperand(0);
@@ -19010,7 +19010,7 @@ static SDValue LowerScalarVariableShift(
return SDValue();
}
-static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
+static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
@@ -19018,7 +19018,7 @@ static SDValue LowerShift(SDValue Op, co
SDValue Amt = Op.getOperand(1);
assert(VT.isVector() && "Custom lowering only for vector shifts!");
- assert(Subtarget->hasSSE2() && "Only custom lower when we have SSE2!");
+ assert(Subtarget.hasSSE2() && "Only custom lower when we have SSE2!");
if (SDValue V = LowerScalarImmediateShift(Op, DAG, Subtarget))
return V;
@@ -19031,7 +19031,7 @@ static SDValue LowerShift(SDValue Op, co
// XOP has 128-bit variable logical/arithmetic shifts.
// +ve/-ve Amt = shift left/right.
- if (Subtarget->hasXOP() &&
+ if (Subtarget.hasXOP() &&
(VT == MVT::v2i64 || VT == MVT::v4i32 ||
VT == MVT::v8i16 || VT == MVT::v16i8)) {
if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) {
@@ -19058,7 +19058,7 @@ static SDValue LowerShift(SDValue Op, co
// i64 vector arithmetic shift can be emulated with the transform:
// M = lshr(SIGN_BIT, Amt)
// ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
- if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget->hasInt256())) &&
+ if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) &&
Op.getOpcode() == ISD::SRA) {
SDValue S = DAG.getConstant(APInt::getSignBit(64), dl, VT);
SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
@@ -19073,7 +19073,7 @@ static SDValue LowerShift(SDValue Op, co
// Do this only if the vector shift count is a constant build_vector.
if (Op.getOpcode() == ISD::SHL &&
(VT == MVT::v8i16 || VT == MVT::v4i32 ||
- (Subtarget->hasInt256() && VT == MVT::v16i16)) &&
+ (Subtarget.hasInt256() && VT == MVT::v16i16)) &&
ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
SmallVector<SDValue, 8> Elts;
MVT SVT = VT.getVectorElementType();
@@ -19233,14 +19233,14 @@ static SDValue LowerShift(SDValue Op, co
}
if (VT == MVT::v16i8 ||
- (VT == MVT::v32i8 && Subtarget->hasInt256() && !Subtarget->hasXOP())) {
+ (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP())) {
MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
unsigned ShiftOpcode = Op->getOpcode();
auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
// On SSE41 targets we make use of the fact that VSELECT lowers
// to PBLENDVB which selects bytes based just on the sign bit.
- if (Subtarget->hasSSE41()) {
+ if (Subtarget.hasSSE41()) {
V0 = DAG.getBitcast(VT, V0);
V1 = DAG.getBitcast(VT, V1);
Sel = DAG.getBitcast(VT, Sel);
@@ -19343,7 +19343,7 @@ static SDValue LowerShift(SDValue Op, co
// It's worth extending once and using the v8i32 shifts for 16-bit types, but
// the extra overheads to get from v16i8 to v8i32 make the existing SSE
// solution better.
- if (Subtarget->hasInt256() && VT == MVT::v8i16) {
+ if (Subtarget.hasInt256() && VT == MVT::v8i16) {
MVT ExtVT = MVT::v8i32;
unsigned ExtOpc =
Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
@@ -19353,7 +19353,7 @@ static SDValue LowerShift(SDValue Op, co
DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
}
- if (Subtarget->hasInt256() && !Subtarget->hasXOP() && VT == MVT::v16i16) {
+ if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) {
MVT ExtVT = MVT::v8i32;
SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z);
@@ -19377,7 +19377,7 @@ static SDValue LowerShift(SDValue Op, co
auto SignBitSelect = [&](SDValue Sel, SDValue V0, SDValue V1) {
// On SSE41 targets we make use of the fact that VSELECT lowers
// to PBLENDVB which selects bytes based just on the sign bit.
- if (Subtarget->hasSSE41()) {
+ if (Subtarget.hasSSE41()) {
MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2);
V0 = DAG.getBitcast(ExtVT, V0);
V1 = DAG.getBitcast(ExtVT, V1);
@@ -19394,7 +19394,7 @@ static SDValue LowerShift(SDValue Op, co
};
// Turn 'a' into a mask suitable for VSELECT: a = a << 12;
- if (Subtarget->hasSSE41()) {
+ if (Subtarget.hasSSE41()) {
// On SSE41 targets we need to replicate the shift mask in both
// bytes for PBLENDVB.
Amt = DAG.getNode(
@@ -19469,7 +19469,7 @@ static SDValue LowerShift(SDValue Op, co
return SDValue();
}
-static SDValue LowerRotate(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
@@ -19477,7 +19477,7 @@ static SDValue LowerRotate(SDValue Op, c
SDValue Amt = Op.getOperand(1);
assert(VT.isVector() && "Custom lowering only for vector rotates!");
- assert(Subtarget->hasXOP() && "XOP support required for vector rotates!");
+ assert(Subtarget.hasXOP() && "XOP support required for vector rotates!");
assert((Op.getOpcode() == ISD::ROTL) && "Only ROTL supported");
// XOP has 128-bit vector variable + immediate rotates.
@@ -19589,9 +19589,9 @@ bool X86TargetLowering::needsCmpXchgNb(T
unsigned OpWidth = MemType->getPrimitiveSizeInBits();
if (OpWidth == 64)
- return !Subtarget->is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b
+ return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b
else if (OpWidth == 128)
- return Subtarget->hasCmpxchg16b();
+ return Subtarget.hasCmpxchg16b();
else
return false;
}
@@ -19611,7 +19611,7 @@ X86TargetLowering::shouldExpandAtomicLoa
TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
- unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32;
+ unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
Type *MemType = AI->getType();
// If the operand is too big, we must see if cmpxchg8/16b is available
@@ -19648,7 +19648,7 @@ X86TargetLowering::shouldExpandAtomicRMW
}
}
-static bool hasMFENCE(const X86Subtarget& Subtarget) {
+static bool hasMFENCE(const X86Subtarget &Subtarget) {
// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
// no-sse2). There isn't any reason to disable it if the target processor
// supports it.
@@ -19657,7 +19657,7 @@ static bool hasMFENCE(const X86Subtarget
LoadInst *
X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
- unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32;
+ unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
Type *MemType = AI->getType();
// Accesses larger than the native width are turned into cmpxchg/libcalls, so
// there is no benefit in turning such RMWs into loads, and it is actually
@@ -19694,7 +19694,7 @@ X86TargetLowering::lowerIdempotentRMWInt
// the IR level, so we must wrap it in an intrinsic.
return nullptr;
- if (!hasMFENCE(*Subtarget))
+ if (!hasMFENCE(Subtarget))
// FIXME: it might make sense to use a locked operation here but on a
// different cache-line to prevent cache-line bouncing. In practice it
// is probably a small win, and x86 processors without mfence are rare
@@ -19714,7 +19714,7 @@ X86TargetLowering::lowerIdempotentRMWInt
return Loaded;
}
-static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
@@ -19725,7 +19725,7 @@ static SDValue LowerATOMIC_FENCE(SDValue
// The only fence that needs an instruction is a sequentially-consistent
// cross-thread fence.
if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) {
- if (hasMFENCE(*Subtarget))
+ if (hasMFENCE(Subtarget))
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
SDValue Chain = Op.getOperand(0);
@@ -19747,7 +19747,7 @@ static SDValue LowerATOMIC_FENCE(SDValue
return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
}
-static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT T = Op.getSimpleValueType();
SDLoc DL(Op);
@@ -19759,7 +19759,7 @@ static SDValue LowerCMP_SWAP(SDValue Op,
case MVT::i16: Reg = X86::AX; size = 2; break;
case MVT::i32: Reg = X86::EAX; size = 4; break;
case MVT::i64:
- assert(Subtarget->is64Bit() && "Node not type legal!");
+ assert(Subtarget.is64Bit() && "Node not type legal!");
Reg = X86::RAX; size = 8;
break;
}
@@ -19789,14 +19789,14 @@ static SDValue LowerCMP_SWAP(SDValue Op,
return SDValue();
}
-static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
SrcVT == MVT::i64) {
- assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+ assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
if (DstVT != MVT::f64)
// This conversion needs to be expanded.
return SDValue();
@@ -19816,7 +19816,7 @@ static SDValue LowerBITCAST(SDValue Op,
Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, Op0,
DAG.getIntPtrConstant(i, dl)));
} else {
- assert(SrcVT == MVT::i64 && !Subtarget->is64Bit() &&
+ assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() &&
"Unexpected source type in LowerBITCAST");
Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
DAG.getIntPtrConstant(0, dl)));
@@ -19835,8 +19835,8 @@ static SDValue LowerBITCAST(SDValue Op,
DAG.getIntPtrConstant(0, dl));
}
- assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
- Subtarget->hasMMX() && "Unexpected custom BITCAST");
+ assert(Subtarget.is64Bit() && !Subtarget.hasSSE2() &&
+ Subtarget.hasMMX() && "Unexpected custom BITCAST");
assert((DstVT == MVT::i64 ||
(DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
"Unexpected custom BITCAST");
@@ -19859,7 +19859,7 @@ static SDValue LowerBITCAST(SDValue Op,
/// how many bytes of V are summed horizontally to produce each element of the
/// result.
static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(V);
MVT ByteVecVT = V.getSimpleValueType();
@@ -19924,7 +19924,7 @@ static SDValue LowerHorizontalByteSum(SD
}
static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, SDLoc DL,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
@@ -19984,7 +19984,7 @@ static SDValue LowerVectorCTPOPInRegLUT(
}
static SDValue LowerVectorCTPOPBitmath(SDValue Op, SDLoc DL,
- const X86Subtarget *Subtarget,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
assert(VT.is128BitVector() &&
@@ -20054,7 +20054,7 @@ static SDValue LowerVectorCTPOPBitmath(S
DAG);
}
-static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
// FIXME: Need to add AVX-512 support here!
@@ -20063,13 +20063,13 @@ static SDValue LowerVectorCTPOP(SDValue
SDLoc DL(Op.getNode());
SDValue Op0 = Op.getOperand(0);
- if (!Subtarget->hasSSSE3()) {
+ if (!Subtarget.hasSSSE3()) {
// We can't use the fast LUT approach, so fall back on vectorized bitmath.
assert(VT.is128BitVector() && "Only 128-bit vectors supported in SSE!");
return LowerVectorCTPOPBitmath(Op0, DL, Subtarget, DAG);
}
- if (VT.is256BitVector() && !Subtarget->hasInt256()) {
+ if (VT.is256BitVector() && !Subtarget.hasInt256()) {
unsigned NumElems = VT.getVectorNumElements();
// Extract each 128-bit vector, compute pop count and concat the result.
@@ -20084,7 +20084,7 @@ static SDValue LowerVectorCTPOP(SDValue
return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG);
}
-static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerCTPOP(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(Op.getSimpleValueType().isVector() &&
"We only do custom lowering for vector population count.");
@@ -20157,9 +20157,9 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(
Op.getOperand(1), Op.getOperand(2));
}
-static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
+ assert(Subtarget.isTargetDarwin() && Subtarget.is64Bit());
// For MacOSX, we want to call an alternative entry point: __sincos_stret,
// which returns the values as { float, float } (in XMM0) or
@@ -20261,9 +20261,9 @@ static SDValue ExtendToType(SDValue InOp
InOp, DAG.getIntPtrConstant(0, dl));
}
-static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- assert(Subtarget->hasAVX512() &&
+ assert(Subtarget.hasAVX512() &&
"MGATHER/MSCATTER are supported on AVX-512 arch only");
// X86 scatter kills mask register, so its type should be added to
@@ -20312,7 +20312,7 @@ static SDValue LowerMSCATTER(SDValue Op,
}
unsigned NumElts = VT.getVectorNumElements();
- if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
+ if (!Subtarget.hasVLX() && !VT.is512BitVector() &&
!Index.getSimpleValueType().is512BitVector()) {
// AVX512F supports only 512-bit vectors. Or data or index should
// be 512 bit wide. If now the both index and data are 256-bit, but
@@ -20355,7 +20355,7 @@ static SDValue LowerMSCATTER(SDValue Op,
return SDValue(NewScatter.getNode(), 0);
}
-static SDValue LowerMLOAD(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
@@ -20363,7 +20363,7 @@ static SDValue LowerMLOAD(SDValue Op, co
SDValue Mask = N->getMask();
SDLoc dl(Op);
- if (Subtarget->hasAVX512() && !Subtarget->hasVLX() &&
+ if (Subtarget.hasAVX512() && !Subtarget.hasVLX() &&
!VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) {
// This operation is legal for targets with VLX, but without
// VLX the vector should be widened to 512 bit
@@ -20387,7 +20387,7 @@ static SDValue LowerMLOAD(SDValue Op, co
return Op;
}
-static SDValue LowerMSTORE(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MaskedStoreSDNode *N = cast<MaskedStoreSDNode>(Op.getNode());
SDValue DataToStore = N->getValue();
@@ -20395,7 +20395,7 @@ static SDValue LowerMSTORE(SDValue Op, c
SDValue Mask = N->getMask();
SDLoc dl(Op);
- if (Subtarget->hasAVX512() && !Subtarget->hasVLX() &&
+ if (Subtarget.hasAVX512() && !Subtarget.hasVLX() &&
!VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) {
// This operation is legal for targets with VLX, but without
// VLX the vector should be widened to 512 bit
@@ -20411,9 +20411,9 @@ static SDValue LowerMSTORE(SDValue Op, c
return Op;
}
-static SDValue LowerMGATHER(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- assert(Subtarget->hasAVX512() &&
+ assert(Subtarget.hasAVX512() &&
"MGATHER/MSCATTER are supported on AVX-512 arch only");
MaskedGatherSDNode *N = cast<MaskedGatherSDNode>(Op.getNode());
@@ -20428,7 +20428,7 @@ static SDValue LowerMGATHER(SDValue Op,
unsigned NumElts = VT.getVectorNumElements();
assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");
- if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
+ if (!Subtarget.hasVLX() && !VT.is512BitVector() &&
!Index.getSimpleValueType().is512BitVector()) {
// AVX512F supports only 512-bit vectors. Or data or index should
// be 512 bit wide. If now the both index and data are 256-bit, but
@@ -20656,15 +20656,15 @@ void X86TargetLowering::ReplaceNodeResul
llvm_unreachable("Do not know how to custom type legalize this operation!");
case X86ISD::AVG: {
// Legalize types for X86ISD::AVG by expanding vectors.
- assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+ assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
auto InVT = N->getValueType(0);
auto InVTSize = InVT.getSizeInBits();
const unsigned RegSize =
(InVTSize > 128) ? ((InVTSize > 256) ? 512 : 256) : 128;
- assert((!Subtarget->hasAVX512() || RegSize < 512) &&
+ assert((!Subtarget.hasAVX512() || RegSize < 512) &&
"512-bit vector requires AVX512");
- assert((!Subtarget->hasAVX2() || RegSize < 256) &&
+ assert((!Subtarget.hasAVX2() || RegSize < 256) &&
"256-bit vector requires AVX2");
auto ElemVT = InVT.getVectorElementType();
@@ -20736,7 +20736,7 @@ void X86TargetLowering::ReplaceNodeResul
return;
}
case ISD::UINT_TO_FP: {
- assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+ assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
if (N->getOperand(0).getValueType() != MVT::v2i32 ||
N->getValueType(0) != MVT::v2f32)
return;
@@ -20863,7 +20863,7 @@ void X86TargetLowering::ReplaceNodeResul
break;
}
case ISD::BITCAST: {
- assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+ assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
EVT DstVT = N->getValueType(0);
EVT SrcVT = N->getOperand(0)->getValueType(0);
@@ -21154,7 +21154,7 @@ bool X86TargetLowering::isLegalAddressin
if (AM.BaseGV) {
unsigned GVFlags =
- Subtarget->ClassifyGlobalReference(AM.BaseGV, getTargetMachine());
+ Subtarget.ClassifyGlobalReference(AM.BaseGV, getTargetMachine());
// If a reference to this global requires an extra load, we can't fold it.
if (isGlobalStubReference(GVFlags))
@@ -21167,7 +21167,7 @@ bool X86TargetLowering::isLegalAddressin
// If lower 4G is not available, then we must use rip-relative addressing.
if ((M != CodeModel::Small || R != Reloc::Static) &&
- Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
+ Subtarget.is64Bit() && (AM.BaseOffs || AM.Scale > 1))
return false;
}
@@ -21204,7 +21204,7 @@ bool X86TargetLowering::isVectorShiftByS
// On AVX2 there are new vpsllv[dq] instructions (and other shifts), that make
// variable shifts just as cheap as scalar ones.
- if (Subtarget->hasInt256() && (Bits == 32 || Bits == 64))
+ if (Subtarget.hasInt256() && (Bits == 32 || Bits == 64))
return false;
// Otherwise, it's significantly cheaper to shift by a scalar amount than by a
@@ -21253,12 +21253,12 @@ bool X86TargetLowering::isTruncateFree(E
bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
// x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
- return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit();
+ return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget.is64Bit();
}
bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
// x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
- return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
+ return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget.is64Bit();
}
bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
@@ -21289,7 +21289,7 @@ bool X86TargetLowering::isVectorLoadExtD
bool
X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
- if (!Subtarget->hasAnyFMA())
+ if (!Subtarget.hasAnyFMA())
return false;
VT = VT.getScalarType();
@@ -21478,9 +21478,9 @@ static MachineBasicBlock *EmitPCMPSTRI(M
}
static MachineBasicBlock *EmitWRPKRU(MachineInstr *MI, MachineBasicBlock *BB,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
DebugLoc dl = MI->getDebugLoc();
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// insert input VAL into EAX
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
@@ -21501,9 +21501,9 @@ static MachineBasicBlock *EmitWRPKRU(Mac
}
static MachineBasicBlock *EmitRDPKRU(MachineInstr *MI, MachineBasicBlock *BB,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
DebugLoc dl = MI->getDebugLoc();
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// insert zero to ECX
BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::ECX)
@@ -21519,12 +21519,12 @@ static MachineBasicBlock *EmitRDPKRU(Mac
}
static MachineBasicBlock *EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
DebugLoc dl = MI->getDebugLoc();
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// Address into RAX/EAX, other two args into ECX, EDX.
- unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
- unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r;
+ unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
for (int i = 0; i < X86::AddrNumOperands; ++i)
MIB.addOperand(MI->getOperand(i));
@@ -21575,7 +21575,7 @@ X86TargetLowering::EmitVAARG64WithCustom
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
// Machine Information
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
@@ -21829,14 +21829,14 @@ X86TargetLowering::EmitVAStartSaveXMMReg
XMMSaveMBB->addSuccessor(EndMBB);
// Now add the instructions.
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned CountReg = MI->getOperand(0).getReg();
int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
- if (!Subtarget->isCallingConvWin64(F->getFunction()->getCallingConv())) {
+ if (!Subtarget.isCallingConvWin64(F->getFunction()->getCallingConv())) {
// If %al is 0, branch around the XMM save block.
BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB);
@@ -21849,7 +21849,7 @@ X86TargetLowering::EmitVAStartSaveXMMReg
!MI->getOperand(MI->getNumOperands() - 1).isReg() ||
MI->getOperand(MI->getNumOperands() - 1).getReg() == X86::EFLAGS)
&& "Expected last argument to be EFLAGS");
- unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
+ unsigned MOVOpc = Subtarget.hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
// In the XMM save block, save all the XMM argument registers.
for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) {
int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
@@ -21944,7 +21944,7 @@ static bool isCMOVPseudo(MachineInstr *M
MachineBasicBlock *
X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// To "insert" a SELECT_CC instruction, we actually have to insert the
@@ -22112,7 +22112,7 @@ X86TargetLowering::EmitLoweredSelect(Mac
// If the EFLAGS register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
- const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
MachineInstr *LastEFLAGSUser = CascadedCMOV ? CascadedCMOV : LastCMOV;
if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) &&
@@ -22234,7 +22234,7 @@ X86TargetLowering::EmitLoweredAtomicFP(M
case X86::RELEASE_FADD32mr: MOp = X86::MOVSSmr; FOp = X86::ADDSSrm; break;
case X86::RELEASE_FADD64mr: MOp = X86::MOVSDmr; FOp = X86::ADDSDrm; break;
}
- const X86InstrInfo *TII = Subtarget->getInstrInfo();
+ const X86InstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
MachineOperand MSrc = MI->getOperand(0);
@@ -22267,14 +22267,14 @@ MachineBasicBlock *
X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
assert(MF->shouldSplitStack());
- const bool Is64Bit = Subtarget->is64Bit();
- const bool IsLP64 = Subtarget->isTarget64BitLP64();
+ const bool Is64Bit = Subtarget.is64Bit();
+ const bool IsLP64 = Subtarget.isTarget64BitLP64();
const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30;
@@ -22308,7 +22308,7 @@ X86TargetLowering::EmitLoweredSegAlloca(
tmpSPVReg = MRI.createVirtualRegister(AddrRegClass),
SPLimitVReg = MRI.createVirtualRegister(AddrRegClass),
sizeVReg = MI->getOperand(1).getReg(),
- physSPReg = IsLP64 || Subtarget->isTargetNaCl64() ? X86::RSP : X86::ESP;
+ physSPReg = IsLP64 || Subtarget.isTargetNaCl64() ? X86::RSP : X86::ESP;
MachineFunction::iterator MBBIter = ++BB->getIterator();
@@ -22340,7 +22340,7 @@ X86TargetLowering::EmitLoweredSegAlloca(
// Calls into a routine in libgcc to allocate more space from the heap.
const uint32_t *RegMask =
- Subtarget->getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C);
+ Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C);
if (IsLP64) {
BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
.addReg(sizeVReg);
@@ -22397,9 +22397,9 @@ X86TargetLowering::EmitLoweredSegAlloca(
MachineBasicBlock *
X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
- assert(!Subtarget->isTargetMachO());
+ assert(!Subtarget.isTargetMachO());
DebugLoc DL = MI->getDebugLoc();
- MachineInstr *ResumeMI = Subtarget->getFrameLowering()->emitStackProbe(
+ MachineInstr *ResumeMI = Subtarget.getFrameLowering()->emitStackProbe(
*BB->getParent(), *BB, MI, DL, false);
MachineBasicBlock *ResumeBB = ResumeMI->getParent();
MI->eraseFromParent(); // The pseudo instruction is gone now.
@@ -22410,7 +22410,7 @@ MachineBasicBlock *
X86TargetLowering::EmitLoweredCatchRet(MachineInstr *MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
- const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
MachineBasicBlock *TargetMBB = MI->getOperand(0).getMBB();
DebugLoc DL = MI->getDebugLoc();
@@ -22419,7 +22419,7 @@ X86TargetLowering::EmitLoweredCatchRet(M
"SEH does not use catchret!");
// Only 32-bit EH needs to worry about manually restoring stack pointers.
- if (!Subtarget->is32Bit())
+ if (!Subtarget.is32Bit())
return BB;
// C++ EH creates a new target block to hold the restore code, and wires up
@@ -22445,8 +22445,8 @@ X86TargetLowering::EmitLoweredCatchPad(M
const Constant *PerFn = MF->getFunction()->getPersonalityFn();
bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(PerFn));
// Only 32-bit SEH requires special handling for catchpad.
- if (IsSEH && Subtarget->is32Bit()) {
- const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
+ if (IsSEH && Subtarget.is32Bit()) {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
BuildMI(*BB, MI, DL, TII.get(X86::EH_RESTORE));
}
@@ -22462,20 +22462,20 @@ X86TargetLowering::EmitLoweredTLSCall(Ma
// or EAX and doing an indirect call. The return value will then
// be in the normal return register.
MachineFunction *F = BB->getParent();
- const X86InstrInfo *TII = Subtarget->getInstrInfo();
+ const X86InstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
- assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
+ assert(Subtarget.isTargetDarwin() && "Darwin only instr emitted?");
assert(MI->getOperand(3).isGlobal() && "This should be a global");
// Get a register mask for the lowered call.
// FIXME: The 32-bit calls have non-standard calling conventions. Use a
// proper register mask.
const uint32_t *RegMask =
- Subtarget->is64Bit() ?
- Subtarget->getRegisterInfo()->getDarwinTLSCallPreservedMask() :
- Subtarget->getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
- if (Subtarget->is64Bit()) {
+ Subtarget.is64Bit() ?
+ Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() :
+ Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
+ if (Subtarget.is64Bit()) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV64rm), X86::RDI)
.addReg(X86::RIP)
@@ -22519,7 +22519,7 @@ X86TargetLowering::emitEHSjLjSetJmp(Mach
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
@@ -22591,7 +22591,7 @@ X86TargetLowering::emitEHSjLjSetJmp(Mach
PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
LabelReg = MRI.createVirtualRegister(PtrRC);
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg)
.addReg(X86::RIP)
.addImm(0)
@@ -22604,7 +22604,7 @@ X86TargetLowering::emitEHSjLjSetJmp(Mach
.addReg(XII->getGlobalBaseReg(MF))
.addImm(0)
.addReg(0)
- .addMBB(restoreMBB, Subtarget->ClassifyBlockAddressReference())
+ .addMBB(restoreMBB, Subtarget.ClassifyBlockAddressReference())
.addReg(0);
}
} else
@@ -22626,7 +22626,7 @@ X86TargetLowering::emitEHSjLjSetJmp(Mach
MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
.addMBB(restoreMBB);
- const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
MIB.addRegMask(RegInfo->getNoPreservedMask());
thisMBB->addSuccessor(mainMBB);
thisMBB->addSuccessor(restoreMBB);
@@ -22645,7 +22645,7 @@ X86TargetLowering::emitEHSjLjSetJmp(Mach
// restoreMBB:
if (RegInfo->hasBasePointer(*MF)) {
const bool Uses64BitFramePtr =
- Subtarget->isTarget64BitLP64() || Subtarget->isTargetNaCl64();
+ Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64();
X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
X86FI->setRestoreBasePointer(MF);
unsigned FramePtr = RegInfo->getFrameRegister(*MF);
@@ -22668,7 +22668,7 @@ X86TargetLowering::emitEHSjLjLongJmp(Mac
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
// Memory Reference
@@ -22683,7 +22683,7 @@ X86TargetLowering::emitEHSjLjLongJmp(Mac
(PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
unsigned Tmp = MRI.createVirtualRegister(RC);
// Since FP is only updated here but NOT referenced, it's treated as GPR.
- const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
unsigned SP = RegInfo->getStackRegister();
@@ -22803,7 +22803,7 @@ X86TargetLowering::emitFMA3Instr(Machine
default: llvm_unreachable("Unrecognized FMA variant.");
}
- const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
MachineInstrBuilder MIB =
BuildMI(MF, MI->getDebugLoc(), TII.get(NewFMAOpc))
.addOperand(MI->getOperand(0))
@@ -22873,7 +22873,7 @@ X86TargetLowering::EmitInstrWithCustomIn
case X86::RDFLAGS32:
case X86::RDFLAGS64: {
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
unsigned PushF =
MI->getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64;
unsigned Pop =
@@ -22888,7 +22888,7 @@ X86TargetLowering::EmitInstrWithCustomIn
case X86::WRFLAGS32:
case X86::WRFLAGS64: {
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
unsigned Push =
MI->getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r;
unsigned PopF =
@@ -22914,7 +22914,7 @@ X86TargetLowering::EmitInstrWithCustomIn
case X86::FP80_TO_INT32_IN_MEM:
case X86::FP80_TO_INT64_IN_MEM: {
MachineFunction *F = BB->getParent();
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// Change the floating point control register to use "round towards zero"
@@ -22996,9 +22996,9 @@ X86TargetLowering::EmitInstrWithCustomIn
case X86::VPCMPESTRM128REG:
case X86::PCMPESTRM128MEM:
case X86::VPCMPESTRM128MEM:
- assert(Subtarget->hasSSE42() &&
+ assert(Subtarget.hasSSE42() &&
"Target must have SSE4.2 or AVX features enabled");
- return EmitPCMPSTRM(MI, BB, Subtarget->getInstrInfo());
+ return EmitPCMPSTRM(MI, BB, Subtarget.getInstrInfo());
// String/text processing lowering.
case X86::PCMPISTRIREG:
@@ -23009,9 +23009,9 @@ X86TargetLowering::EmitInstrWithCustomIn
case X86::VPCMPESTRIREG:
case X86::PCMPESTRIMEM:
case X86::VPCMPESTRIMEM:
- assert(Subtarget->hasSSE42() &&
+ assert(Subtarget.hasSSE42() &&
"Target must have SSE4.2 or AVX features enabled");
- return EmitPCMPSTRI(MI, BB, Subtarget->getInstrInfo());
+ return EmitPCMPSTRI(MI, BB, Subtarget.getInstrInfo());
// Thread synchronization.
case X86::MONITOR:
@@ -23023,7 +23023,7 @@ X86TargetLowering::EmitInstrWithCustomIn
return EmitRDPKRU(MI, BB, Subtarget);
// xbegin
case X86::XBEGIN:
- return EmitXBegin(MI, BB, Subtarget->getInstrInfo());
+ return EmitXBegin(MI, BB, Subtarget.getInstrInfo());
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
@@ -23186,7 +23186,7 @@ bool X86TargetLowering::isGAPlusOffset(S
/// FIXME: This could be expanded to support 512 bit vectors as well.
static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget* Subtarget) {
+ const X86Subtarget &Subtarget) {
SDLoc dl(N);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
SDValue V1 = SVOp->getOperand(0);
@@ -23272,7 +23272,7 @@ static SDValue PerformShuffleCombine256(
static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
int Depth, bool HasPSHUFB, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
assert(!Mask.empty() && "Cannot combine an empty shuffle mask!");
// Find the operand that enters the chain. Note that multiple uses are OK
@@ -23324,7 +23324,7 @@ static bool combineX86ShuffleChain(SDVal
// Check if we have SSE3 which will let us use MOVDDUP. That instruction
// is no slower than UNPCKLPD but has the option to fold the input operand
// into even an unaligned memory load.
- if (Lo && Subtarget->hasSSE3()) {
+ if (Lo && Subtarget.hasSSE3()) {
Shuffle = X86ISD::MOVDDUP;
ShuffleVT = MVT::v2f64;
} else {
@@ -23346,7 +23346,7 @@ static bool combineX86ShuffleChain(SDVal
/*AddTo*/ true);
return true;
}
- if (Subtarget->hasSSE3() &&
+ if (Subtarget.hasSSE3() &&
(Mask.equals({0, 0, 2, 2}) || Mask.equals({1, 1, 3, 3}))) {
bool Lo = Mask.equals({0, 0, 2, 2});
unsigned Shuffle = Lo ? X86ISD::MOVSLDUP : X86ISD::MOVSHDUP;
@@ -23419,7 +23419,7 @@ static bool combineX86ShuffleChain(SDVal
// can replace them with a single PSHUFB instruction profitably. Intel's
// manuals suggest only using PSHUFB if doing so replacing 5 instructions, but
// in practice PSHUFB tends to be *very* fast so we're more aggressive.
- if ((Depth >= 3 || HasPSHUFB) && Subtarget->hasSSSE3()) {
+ if ((Depth >= 3 || HasPSHUFB) && Subtarget.hasSSSE3()) {
SmallVector<SDValue, 16> PSHUFBMask;
int NumBytes = VT.getSizeInBits() / 8;
int Ratio = NumBytes / Mask.size();
@@ -23484,7 +23484,7 @@ static bool combineX86ShufflesRecursivel
int Depth, bool HasPSHUFB,
SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
// Bound the depth of our recursive combine because this is ultimately
// quadratic in nature.
if (Depth > 8)
@@ -23888,7 +23888,7 @@ static bool setTargetShuffleZeroElements
/// \brief Try to combine x86 target specific shuffles.
static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
SDLoc DL(N);
MVT VT = N.getSimpleValueType();
SmallVector<int, 4> Mask;
@@ -24153,12 +24153,12 @@ static SDValue PerformTargetShuffleCombi
/// the operands which explicitly discard the lanes which are unused by this
/// operation to try to flow through the rest of the combiner the fact that
/// they're unused.
-static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget *Subtarget,
+static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
- if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
- (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
+ if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
+ (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
return SDValue();
// We only handle target-independent shuffles.
@@ -24207,7 +24207,7 @@ static SDValue combineShuffleToAddSub(SD
/// PerformShuffleCombine - Performs several different shuffle combines.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
SDLoc dl(N);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -24225,7 +24225,7 @@ static SDValue PerformShuffleCombine(SDN
return AddSub;
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
- if (TLI.isTypeLegal(VT) && Subtarget->hasFp256() && VT.is256BitVector() &&
+ if (TLI.isTypeLegal(VT) && Subtarget.hasFp256() && VT.is256BitVector() &&
N->getOpcode() == ISD::VECTOR_SHUFFLE)
return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
@@ -24418,7 +24418,7 @@ static SDValue XFormVExtractWithShuffleI
}
static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -24447,8 +24447,8 @@ static SDValue PerformBITCASTCombine(SDN
case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
default: return SDValue();
}
- if (((Subtarget->hasSSE1() && VT == MVT::f32) ||
- (Subtarget->hasSSE2() && VT == MVT::f64)) &&
+ if (((Subtarget.hasSSE1() && VT == MVT::f32) ||
+ (Subtarget.hasSSE2() && VT == MVT::f64)) &&
isa<ConstantSDNode>(N0.getOperand(1)) &&
N0.getOperand(0).getOpcode() == ISD::BITCAST &&
N0.getOperand(0).getOperand(0).getValueType() == VT) {
@@ -24613,7 +24613,7 @@ static SDValue PerformEXTRACT_VECTOR_ELT
static SDValue
transformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
SDLoc dl(N);
SDValue Cond = N->getOperand(0);
SDValue LHS = N->getOperand(1);
@@ -24659,7 +24659,7 @@ transformVSELECTtoBlendVECTOR_SHUFFLE(SD
/// nodes.
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
SDLoc DL(N);
SDValue Cond = N->getOperand(0);
// Get the LHS/RHS of the select.
@@ -24676,8 +24676,8 @@ static SDValue PerformSELECTCombine(SDNo
if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
VT != MVT::f80 && VT != MVT::f128 &&
(TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
- (Subtarget->hasSSE2() ||
- (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
+ (Subtarget.hasSSE2() ||
+ (Subtarget.hasSSE1() && VT.getScalarType() == MVT::f32))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
unsigned Opcode = 0;
@@ -24815,7 +24815,7 @@ static SDValue PerformSELECTCombine(SDNo
}
EVT CondVT = Cond.getValueType();
- if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() &&
+ if (Subtarget.hasAVX512() && VT.isVector() && CondVT.isVector() &&
CondVT.getVectorElementType() == MVT::i1) {
// v16i8 (select v16i1, v16i8, v16i8) does not have a proper
// lowering on KNL. In this case we convert it to
@@ -24826,7 +24826,7 @@ static SDValue PerformSELECTCombine(SDNo
if ((OpVT.is128BitVector() || OpVT.is256BitVector()) &&
(OpVT.getVectorElementType() == MVT::i8 ||
OpVT.getVectorElementType() == MVT::i16) &&
- !(Subtarget->hasBWI() && Subtarget->hasVLX())) {
+ !(Subtarget.hasBWI() && Subtarget.hasVLX())) {
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond);
DCI.AddToWorklist(Cond.getNode());
return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS);
@@ -24964,8 +24964,8 @@ static SDValue PerformSELECTCombine(SDNo
// Match VSELECTs into subs with unsigned saturation.
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
// psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
- ((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
- (Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
+ ((Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
+ (Subtarget.hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
// Check if one of the arms of the VSELECT is a zero vector. If it's on the
@@ -25119,10 +25119,10 @@ static SDValue PerformSELECTCombine(SDNo
if (VT.getVectorElementType() == MVT::i16)
return SDValue();
// Dynamic blending was only available from SSE4.1 onward.
- if (VT.is128BitVector() && !Subtarget->hasSSE41())
+ if (VT.is128BitVector() && !Subtarget.hasSSE41())
return SDValue();
// Byte blends are only available in AVX2
- if (VT == MVT::v32i8 && !Subtarget->hasAVX2())
+ if (VT == MVT::v32i8 && !Subtarget.hasAVX2())
return SDValue();
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
@@ -25350,7 +25350,7 @@ static bool checkBoolTestAndOrSetCCCombi
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
SDLoc DL(N);
// If the flag operand isn't dead, don't touch this CMOV.
@@ -25763,11 +25763,11 @@ static SDValue PerformSRACombine(SDNode
/// shift by a constant amount which is known to be bigger than or equal
/// to the vector element size in bits.
static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
- (!Subtarget->hasInt256() ||
+ (!Subtarget.hasInt256() ||
(VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
return SDValue();
@@ -25793,7 +25793,7 @@ static SDValue performShiftToAllZeros(SD
/// PerformShiftCombine - Combine shifts.
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
if (N->getOpcode() == ISD::SHL)
if (SDValue V = PerformSHLCombine(N, DAG))
return V;
@@ -25815,12 +25815,12 @@ static SDValue PerformShiftCombine(SDNod
// and friends. Likewise for OR -> CMPNEQSS.
static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
unsigned opcode;
// SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
// we're requiring SSE2 for both.
- if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
+ if (Subtarget.hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CMP0 = N0->getOperand(1);
@@ -25869,7 +25869,7 @@ static SDValue CMPEQCombine(SDNode *N, S
// FIXME: need symbolic constants for these magic numbers.
// See X86ATTInstPrinter.cpp:printSSECC().
unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
- if (Subtarget->hasAVX512()) {
+ if (Subtarget.hasAVX512()) {
SDValue FSetCC = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CMP00,
CMP01,
DAG.getConstant(x86cc, DL, MVT::i8));
@@ -25886,7 +25886,7 @@ static SDValue CMPEQCombine(SDNode *N, S
bool is64BitFP = (CMP00.getValueType() == MVT::f64);
MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
- if (is64BitFP && !Subtarget->is64Bit()) {
+ if (is64BitFP && !Subtarget.is64Bit()) {
// On a 32-bit target, we cannot bitcast the 64-bit float to a
// 64-bit integer, since that's not a legal type. Since
// OnesOrZeroesF is all ones of all zeroes, we don't need all the
@@ -25949,7 +25949,7 @@ static bool CanFoldXORWithAllOnes(const
// some of the transition sequences.
static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
if (!VT.is256BitVector())
return SDValue();
@@ -26028,7 +26028,7 @@ static SDValue WidenMaskArithmetic(SDNod
static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
@@ -26120,7 +26120,7 @@ static SDValue VectorZextCombine(SDNode
/// types, try to convert this into a floating point logic node to avoid
/// unnecessary moves from SSE to integer registers.
static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
unsigned FPOpcode = ISD::DELETED_NODE;
if (N->getOpcode() == ISD::AND)
FPOpcode = X86ISD::FAND;
@@ -26137,8 +26137,8 @@ static SDValue convertIntLogicToFPLogic(
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST &&
- ((Subtarget->hasSSE1() && VT == MVT::i32) ||
- (Subtarget->hasSSE2() && VT == MVT::i64))) {
+ ((Subtarget.hasSSE1() && VT == MVT::i32) ||
+ (Subtarget.hasSSE2() && VT == MVT::i64))) {
SDValue N00 = N0.getOperand(0);
SDValue N10 = N1.getOperand(0);
EVT N00Type = N00.getValueType();
@@ -26153,7 +26153,7 @@ static SDValue convertIntLogicToFPLogic(
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -26175,7 +26175,7 @@ static SDValue PerformAndCombine(SDNode
// BEXTR is ((X >> imm) & (2**size-1))
if (VT == MVT::i32 || VT == MVT::i64) {
// Check for BEXTR.
- if ((Subtarget->hasBMI() || Subtarget->hasTBM()) &&
+ if ((Subtarget.hasBMI() || Subtarget.hasTBM()) &&
(N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)) {
ConstantSDNode *MaskNode = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *ShiftNode = dyn_cast<ConstantSDNode>(N0.getOperand(1));
@@ -26219,7 +26219,7 @@ static SDValue PerformAndCombine(SDNode
static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -26235,8 +26235,8 @@ static SDValue PerformOrCombine(SDNode *
// look for psign/blend
if (VT == MVT::v2i64 || VT == MVT::v4i64) {
- if (!Subtarget->hasSSSE3() ||
- (VT == MVT::v4i64 && !Subtarget->hasInt256()))
+ if (!Subtarget.hasSSSE3() ||
+ (VT == MVT::v4i64 && !Subtarget.hasInt256()))
return SDValue();
// Canonicalize pandn to RHS
@@ -26297,7 +26297,7 @@ static SDValue PerformOrCombine(SDNode *
return DAG.getBitcast(VT, Mask);
}
// PBLENDVB only available on SSE 4.1
- if (!Subtarget->hasSSE41())
+ if (!Subtarget.hasSSE41())
return SDValue();
MVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
@@ -26321,7 +26321,7 @@ static SDValue PerformOrCombine(SDNode *
// series of shifts/or that would otherwise be generated.
// Don't fold (or (x << c) | (y >> (64 - c))) if SHLD/SHRD instructions
// have higher latencies and we are not optimizing for size.
- if (!OptForSize && Subtarget->isSHLDSlow())
+ if (!OptForSize && Subtarget.isSHLDSlow())
return SDValue();
if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
@@ -26460,14 +26460,14 @@ static SDValue foldXorTruncShiftIntoCmp(
static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))
return RV;
- if (Subtarget->hasCMov())
+ if (Subtarget.hasCMov())
if (SDValue RV = performIntegerAbsCombine(N, DAG))
return RV;
@@ -26481,7 +26481,7 @@ static SDValue PerformXorCombine(SDNode
/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
/// X86ISD::AVG instruction.
static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
- const X86Subtarget *Subtarget, SDLoc DL) {
+ const X86Subtarget &Subtarget, SDLoc DL) {
if (!VT.isVector() || !VT.isSimple())
return SDValue();
EVT InVT = In.getValueType();
@@ -26498,10 +26498,10 @@ static SDValue detectAVGPattern(SDValue
if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits())
return SDValue();
- if (Subtarget->hasAVX512()) {
+ if (Subtarget.hasAVX512()) {
if (VT.getSizeInBits() > 512)
return SDValue();
- } else if (Subtarget->hasAVX2()) {
+ } else if (Subtarget.hasAVX2()) {
if (VT.getSizeInBits() > 256)
return SDValue();
} else {
@@ -26600,7 +26600,7 @@ static SDValue detectAVGPattern(SDValue
/// PerformLOADCombine - Do target-specific dag combines on LOAD nodes.
static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
LoadSDNode *Ld = cast<LoadSDNode>(N);
EVT RegVT = Ld->getValueType(0);
EVT MemVT = Ld->getMemoryVT();
@@ -26652,7 +26652,7 @@ static SDValue PerformLOADCombine(SDNode
/// PerformMLOADCombine - Resolve extending loads
static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
MaskedLoadSDNode *Mld = cast<MaskedLoadSDNode>(N);
if (Mld->getExtensionType() != ISD::SEXTLOAD)
return SDValue();
@@ -26731,7 +26731,7 @@ static SDValue PerformMLOADCombine(SDNod
}
/// PerformMSTORECombine - Resolve truncating stores
static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N);
if (!Mst->isTruncatingStore())
return SDValue();
@@ -26820,7 +26820,7 @@ static SDValue PerformMSTORECombine(SDNo
}
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
StoreSDNode *St = cast<StoreSDNode>(N);
EVT VT = St->getValue().getValueType();
EVT StVT = St->getMemoryVT();
@@ -26965,9 +26965,9 @@ static SDValue PerformSTORECombine(SDNod
const Function *F = DAG.getMachineFunction().getFunction();
bool NoImplicitFloatOps = F->hasFnAttribute(Attribute::NoImplicitFloat);
bool F64IsLegal =
- !Subtarget->useSoftFloat() && !NoImplicitFloatOps && Subtarget->hasSSE2();
+ !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2();
if ((VT.isVector() ||
- (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
+ (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit())) &&
isa<LoadSDNode>(St->getValue()) &&
!cast<LoadSDNode>(St->getValue())->isVolatile() &&
St->getChain().hasOneUse() && !St->isVolatile()) {
@@ -27006,8 +27006,8 @@ static SDValue PerformSTORECombine(SDNod
// If we are a 64-bit capable x86, lower to a single movq load/store pair.
// Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
// pair instead.
- if (Subtarget->is64Bit() || F64IsLegal) {
- MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
+ if (Subtarget.is64Bit() || F64IsLegal) {
+ MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64;
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
Ld->getPointerInfo(), Ld->isVolatile(),
Ld->isNonTemporal(), Ld->isInvariant(),
@@ -27067,7 +27067,7 @@ static SDValue PerformSTORECombine(SDNod
// to get past legalization. The execution dependencies fixup pass will
// choose the optimal machine instruction for the store if this really is
// an integer or v2f32 rather than an f64.
- if (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit() &&
+ if (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit() &&
St->getOperand(1).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
SDValue OldExtract = St->getOperand(1);
SDValue ExtOp0 = OldExtract.getOperand(0);
@@ -27212,14 +27212,14 @@ static bool isHorizontalBinOp(SDValue &L
/// Do target-specific dag combines on floating point adds.
static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
// Try to synthesize horizontal adds from adds of shuffles.
- if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
- (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
+ if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+ (Subtarget.hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, true))
return DAG.getNode(X86ISD::FHADD, SDLoc(N), VT, LHS, RHS);
return SDValue();
@@ -27227,14 +27227,14 @@ static SDValue PerformFADDCombine(SDNode
/// Do target-specific dag combines on floating point subs.
static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
// Try to synthesize horizontal subs from subs of shuffles.
- if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
- (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
+ if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+ (Subtarget.hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, false))
return DAG.getNode(X86ISD::FHSUB, SDLoc(N), VT, LHS, RHS);
return SDValue();
@@ -27327,7 +27327,7 @@ combineVectorTruncationWithPACKSS(SDNode
/// element that is extracted from a vector and then truncated, and it is
/// diffcult to do this optimization based on them.
static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
EVT OutVT = N->getValueType(0);
if (!OutVT.isVector())
return SDValue();
@@ -27342,7 +27342,7 @@ static SDValue combineVectorTruncation(S
// TODO: On AVX2, the behavior of X86ISD::PACKUS is different from that on
// SSE2, and we need to take care of it specially.
// AVX512 provides vpmovdb.
- if (!Subtarget->hasSSE2() || Subtarget->hasAVX2())
+ if (!Subtarget.hasSSE2() || Subtarget.hasAVX2())
return SDValue();
EVT OutSVT = OutVT.getVectorElementType();
@@ -27353,7 +27353,7 @@ static SDValue combineVectorTruncation(S
return SDValue();
// SSSE3's pshufb results in less instructions in the cases below.
- if (Subtarget->hasSSSE3() && NumElems == 8 &&
+ if (Subtarget.hasSSSE3() && NumElems == 8 &&
((OutSVT == MVT::i8 && InSVT != MVT::i64) ||
(InSVT == MVT::i32 && OutSVT == MVT::i16)))
return SDValue();
@@ -27373,7 +27373,7 @@ static SDValue combineVectorTruncation(S
// SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS
// for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to
// truncate 2 x v4i32 to v8i16.
- if (Subtarget->hasSSE41() || OutSVT == MVT::i8)
+ if (Subtarget.hasSSE41() || OutSVT == MVT::i8)
return combineVectorTruncationWithPACKUS(N, DAG, SubVec);
else if (InSVT == MVT::i32)
return combineVectorTruncationWithPACKSS(N, DAG, SubVec);
@@ -27382,7 +27382,7 @@ static SDValue combineVectorTruncation(S
}
static SDValue PerformTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
// Try to detect AVG pattern first.
SDValue Avg = detectAVGPattern(N->getOperand(0), N->getValueType(0), DAG,
Subtarget, SDLoc(N));
@@ -27394,7 +27394,7 @@ static SDValue PerformTRUNCATECombine(SD
/// Do target-specific dag combines on floating point negations.
static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
EVT SVT = VT.getScalarType();
SDValue Arg = N->getOperand(0);
@@ -27408,7 +27408,7 @@ static SDValue PerformFNEGCombine(SDNode
// use of a constant by performing (-0 - A*B) instead.
// FIXME: Check rounding control flags as well once it becomes available.
if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) &&
- Arg->getFlags()->hasNoSignedZeros() && Subtarget->hasAnyFMA()) {
+ Arg->getFlags()->hasNoSignedZeros() && Subtarget.hasAnyFMA()) {
SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
Arg.getOperand(1), Zero);
@@ -27436,9 +27436,9 @@ static SDValue PerformFNEGCombine(SDNode
}
static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
- if (VT.is512BitVector() && !Subtarget->hasDQI()) {
+ if (VT.is512BitVector() && !Subtarget.hasDQI()) {
// VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extention.
// These logic operations may be executed in the integer domain.
SDLoc dl(N);
@@ -27462,7 +27462,7 @@ static SDValue lowerX86FPLogicOp(SDNode
}
/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
// F[X]OR(0.0, x) -> x
@@ -27500,8 +27500,8 @@ static SDValue PerformFMinFMaxCombine(SD
}
static SDValue performFMinNumFMaxNumCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
- if (Subtarget->useSoftFloat())
+ const X86Subtarget &Subtarget) {
+ if (Subtarget.useSoftFloat())
return SDValue();
// TODO: Check for global or instruction-level "nnan". In that case, we
@@ -27510,9 +27510,9 @@ static SDValue performFMinNumFMaxNumComb
// should be an optional swap and FMAX/FMIN.
EVT VT = N->getValueType(0);
- if (!((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
- (Subtarget->hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) ||
- (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))))
+ if (!((Subtarget.hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
+ (Subtarget.hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) ||
+ (Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))))
return SDValue();
// This takes at least 3 instructions, so favor a library call when operating
@@ -27557,7 +27557,7 @@ static SDValue performFMinNumFMaxNumComb
/// Do target-specific dag combines on X86ISD::FAND nodes.
static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
// FAND(0.0, x) -> 0.0
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
@@ -27573,7 +27573,7 @@ static SDValue PerformFANDCombine(SDNode
/// Do target-specific dag combines on X86ISD::FANDN nodes
static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
// FANDN(0.0, x) -> x
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
@@ -27620,7 +27620,7 @@ static SDValue PerformVZEXT_MOVLCombine(
}
static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
if (!VT.isVector())
return SDValue();
@@ -27641,7 +27641,7 @@ static SDValue PerformSIGN_EXTEND_INREGC
// EXTLOAD has a better solution on AVX2,
// it may be replaced with X86ISD::VSEXT node.
- if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256())
+ if (N00.getOpcode() == ISD::LOAD && Subtarget.hasInt256())
if (!ISD::isNormalLoad(N00.getNode()))
return SDValue();
@@ -27659,7 +27659,7 @@ static SDValue PerformSIGN_EXTEND_INREGC
/// to combine math ops, use an LEA, or use a complex addressing mode. This can
/// eliminate extend, add, and shift instructions.
static SDValue promoteSextBeforeAddNSW(SDNode *Sext, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
// TODO: This should be valid for other integer types.
EVT VT = Sext->getValueType(0);
if (VT != MVT::i64)
@@ -27733,7 +27733,7 @@ static SDValue getDivRem8(SDNode *N, Sel
static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT SVT = VT.getScalarType();
@@ -27754,7 +27754,7 @@ static SDValue PerformSExtCombine(SDNode
return SDValue();
}
- if (VT.isVector() && Subtarget->hasSSE2()) {
+ if (VT.isVector() && Subtarget.hasSSE2()) {
auto ExtendVecSize = [&DAG](SDLoc DL, SDValue N, unsigned Size) {
EVT InVT = N.getValueType();
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
@@ -27790,7 +27790,7 @@ static SDValue PerformSExtCombine(SDNode
// On pre-AVX2 targets, split into 128-bit nodes of
// ISD::SIGN_EXTEND_VECTOR_INREG.
- if (!Subtarget->hasInt256() && !(VT.getSizeInBits() % 128) &&
+ if (!Subtarget.hasInt256() && !(VT.getSizeInBits() % 128) &&
(SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
(InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
unsigned NumVecs = VT.getSizeInBits() / 128;
@@ -27811,7 +27811,7 @@ static SDValue PerformSExtCombine(SDNode
}
}
- if (Subtarget->hasAVX() && VT.is256BitVector())
+ if (Subtarget.hasAVX() && VT.is256BitVector())
if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;
@@ -27822,7 +27822,7 @@ static SDValue PerformSExtCombine(SDNode
}
static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget* Subtarget) {
+ const X86Subtarget &Subtarget) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
@@ -27831,7 +27831,7 @@ static SDValue PerformFMACombine(SDNode
return SDValue();
EVT ScalarVT = VT.getScalarType();
- if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasAnyFMA())
+ if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA())
return SDValue();
SDValue A = N->getOperand(0);
@@ -27862,7 +27862,7 @@ static SDValue PerformFMACombine(SDNode
static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
// (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
// (and (i32 x86isd::setcc_carry), 1)
// This eliminates the zext. This transformation is necessary because
@@ -27910,7 +27910,7 @@ static SDValue PerformZExtCombine(SDNode
// Optimize x == -y --> x+y == 0
// x != -y --> x+y != 0
static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget* Subtarget) {
+ const X86Subtarget &Subtarget) {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
@@ -28003,7 +28003,7 @@ static SDValue MaterializeSETB(SDLoc DL,
// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
SDLoc DL(N);
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
SDValue EFLAGS = N->getOperand(1);
@@ -28044,7 +28044,7 @@ static SDValue PerformSETCCCombine(SDNod
//
static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SDValue Dest = N->getOperand(1);
@@ -28107,7 +28107,7 @@ static SDValue performVectorCompareAndMa
}
static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
SDValue Op0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT InVT = Op0.getValueType();
@@ -28132,7 +28132,7 @@ static SDValue PerformUINT_TO_FPCombine(
}
static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
// First try to optimize away the conversion entirely when it's
// conditionally from a constant. Vectors only.
if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
@@ -28156,7 +28156,7 @@ static SDValue PerformSINT_TO_FPCombine(
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
// a 32-bit target where SSE doesn't support i64->FP operations.
- if (!Subtarget->useSoftFloat() && Op0.getOpcode() == ISD::LOAD) {
+ if (!Subtarget.useSoftFloat() && Op0.getOpcode() == ISD::LOAD) {
LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
EVT LdVT = Ld->getValueType(0);
@@ -28166,8 +28166,8 @@ static SDValue PerformSINT_TO_FPCombine(
if (!Ld->isVolatile() && !VT.isVector() &&
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
- !Subtarget->is64Bit() && LdVT == MVT::i64) {
- SDValue FILDChain = Subtarget->getTargetLowering()->BuildFILD(
+ !Subtarget.is64Bit() && LdVT == MVT::i64) {
+ SDValue FILDChain = Subtarget.getTargetLowering()->BuildFILD(
SDValue(N, 0), LdVT, Ld->getChain(), Op0, DAG);
DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
return FILDChain;
@@ -28245,14 +28245,14 @@ static SDValue OptimizeConditionalInDecr
/// PerformADDCombine - Do target-specific dag combines on integer adds.
static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
// Try to synthesize horizontal adds from adds of shuffles.
- if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
- (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+ if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
+ (Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HADD, SDLoc(N), VT, Op0, Op1);
@@ -28260,7 +28260,7 @@ static SDValue PerformAddCombine(SDNode
}
static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
@@ -28284,8 +28284,8 @@ static SDValue PerformSubCombine(SDNode
// Try to synthesize horizontal adds from adds of shuffles.
EVT VT = N->getValueType(0);
- if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
- (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+ if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
+ (Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);
@@ -28295,7 +28295,7 @@ static SDValue PerformSubCombine(SDNode
/// performVZEXTCombine - Performs build vector combines
static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ const X86Subtarget &Subtarget) {
SDLoc DL(N);
MVT VT = N->getSimpleValueType(0);
SDValue Op = N->getOperand(0);
@@ -28738,13 +28738,13 @@ TargetLowering::ConstraintWeight
weight = CW_SpecificReg;
break;
case 'y':
- if (type->isX86_MMXTy() && Subtarget->hasMMX())
+ if (type->isX86_MMXTy() && Subtarget.hasMMX())
weight = CW_SpecificReg;
break;
case 'x':
case 'Y':
- if (((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) ||
- ((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasFp256()))
+ if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) ||
+ ((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasFp256()))
weight = CW_Register;
break;
case 'I':
@@ -28814,9 +28814,9 @@ LowerXConstraint(EVT ConstraintVT) const
// FP X constraints get lowered to SSE1/2 registers if available, otherwise
// 'f' like normal targets.
if (ConstraintVT.isFloatingPoint()) {
- if (Subtarget->hasSSE2())
+ if (Subtarget.hasSSE2())
return "Y";
- if (Subtarget->hasSSE1())
+ if (Subtarget.hasSSE1())
return "x";
}
@@ -28867,7 +28867,7 @@ void X86TargetLowering::LowerAsmOperandF
case 'L':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff ||
- (Subtarget->is64Bit() && C->getZExtValue() == 0xffffffff)) {
+ (Subtarget.is64Bit() && C->getZExtValue() == 0xffffffff)) {
Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
Op.getValueType());
break;
@@ -28940,7 +28940,7 @@ void X86TargetLowering::LowerAsmOperandF
// In any sort of PIC mode addresses need to be computed at runtime by
// adding in a register or some sort of table lookup. These can't
// be used as immediates.
- if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC())
+ if (Subtarget.isPICStyleGOT() || Subtarget.isPICStyleStubPIC())
return;
// If we are in non-pic codegen mode, we allow the address of a global (with
@@ -28975,7 +28975,7 @@ void X86TargetLowering::LowerAsmOperandF
// If we require an extra load to get this address, as in PIC mode, we
// can't accept it.
if (isGlobalStubReference(
- Subtarget->ClassifyGlobalReference(GV, DAG.getTarget())))
+ Subtarget.ClassifyGlobalReference(GV, DAG.getTarget())))
return;
Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op),
@@ -29005,7 +29005,7 @@ X86TargetLowering::getRegForInlineAsmCon
// RIP in the class. Do they matter any more here than they do
// in the normal allocation?
case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
- if (Subtarget->is64Bit()) {
+ if (Subtarget.is64Bit()) {
if (VT == MVT::i32 || VT == MVT::f32)
return std::make_pair(0U, &X86::GR32RegClass);
if (VT == MVT::i16)
@@ -29033,7 +29033,7 @@ X86TargetLowering::getRegForInlineAsmCon
return std::make_pair(0U, &X86::GR8RegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16RegClass);
- if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget->is64Bit())
+ if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget.is64Bit())
return std::make_pair(0U, &X86::GR32RegClass);
return std::make_pair(0U, &X86::GR64RegClass);
case 'R': // LEGACY_REGS
@@ -29041,7 +29041,7 @@ X86TargetLowering::getRegForInlineAsmCon
return std::make_pair(0U, &X86::GR8_NOREXRegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16_NOREXRegClass);
- if (VT == MVT::i32 || !Subtarget->is64Bit())
+ if (VT == MVT::i32 || !Subtarget.is64Bit())
return std::make_pair(0U, &X86::GR32_NOREXRegClass);
return std::make_pair(0U, &X86::GR64_NOREXRegClass);
case 'f': // FP Stack registers.
@@ -29053,13 +29053,13 @@ X86TargetLowering::getRegForInlineAsmCon
return std::make_pair(0U, &X86::RFP64RegClass);
return std::make_pair(0U, &X86::RFP80RegClass);
case 'y': // MMX_REGS if MMX allowed.
- if (!Subtarget->hasMMX()) break;
+ if (!Subtarget.hasMMX()) break;
return std::make_pair(0U, &X86::VR64RegClass);
case 'Y': // SSE_REGS if SSE2 allowed
- if (!Subtarget->hasSSE2()) break;
+ if (!Subtarget.hasSSE2()) break;
// FALL THROUGH.
case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
- if (!Subtarget->hasSSE1()) break;
+ if (!Subtarget.hasSSE1()) break;
switch (VT.SimpleTy) {
default: break;
@@ -29242,7 +29242,7 @@ bool X86TargetLowering::isIntDivCheap(EV
}
void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
- if (!Subtarget->is64Bit())
+ if (!Subtarget.is64Bit())
return;
// Update IsSplitCSR in X86MachineFunctionInfo.
@@ -29254,12 +29254,12 @@ void X86TargetLowering::initializeSplitC
void X86TargetLowering::insertCopiesSplitCSR(
MachineBasicBlock *Entry,
const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
- const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
if (!IStart)
return;
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
MachineBasicBlock::iterator MBBI = Entry->begin();
for (const MCPhysReg *I = IStart; *I; ++I) {
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=258867&r1=258866&r2=258867&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Jan 26 16:08:58 2016
@@ -955,9 +955,9 @@ namespace llvm {
MVT VT) const override;
private:
- /// Keep a pointer to the X86Subtarget around so that we can
+ /// Keep a reference to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
- const X86Subtarget *Subtarget;
+ const X86Subtarget &Subtarget;
/// Select between SSE or x87 floating point ops.
/// When SSE is available, use it for f32 operations.
More information about the llvm-commits
mailing list