[llvm] r258867 - [x86] make the subtarget member a const reference, not a pointer ; NFCI

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 26 14:08:58 PST 2016


Author: spatel
Date: Tue Jan 26 16:08:58 2016
New Revision: 258867

URL: http://llvm.org/viewvc/llvm-project?rev=258867&view=rev
Log:
[x86] make the subtarget member a const reference, not a pointer ; NFCI

It's passed in as a reference; it's not optional; it's not a pointer.


Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=258867&r1=258866&r2=258867&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jan 26 16:08:58 2016
@@ -71,9 +71,9 @@ static cl::opt<bool> ExperimentalVectorW
 
 X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
                                      const X86Subtarget &STI)
-    : TargetLowering(TM), Subtarget(&STI) {
-  X86ScalarSSEf64 = Subtarget->hasSSE2();
-  X86ScalarSSEf32 = Subtarget->hasSSE1();
+    : TargetLowering(TM), Subtarget(STI) {
+  X86ScalarSSEf64 = Subtarget.hasSSE2();
+  X86ScalarSSEf32 = Subtarget.hasSSE1();
   MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
 
   // Set up the TargetLowering object.
@@ -86,24 +86,24 @@ X86TargetLowering::X86TargetLowering(con
   // For 64-bit, since we have so many registers, use the ILP scheduler.
   // For 32-bit, use the register pressure specific scheduling.
   // For Atom, always use ILP scheduling.
-  if (Subtarget->isAtom())
+  if (Subtarget.isAtom())
     setSchedulingPreference(Sched::ILP);
-  else if (Subtarget->is64Bit())
+  else if (Subtarget.is64Bit())
     setSchedulingPreference(Sched::ILP);
   else
     setSchedulingPreference(Sched::RegPressure);
-  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
 
   // Bypass expensive divides on Atom when compiling with O2.
   if (TM.getOptLevel() >= CodeGenOpt::Default) {
-    if (Subtarget->hasSlowDivide32())
+    if (Subtarget.hasSlowDivide32())
       addBypassSlowDiv(32, 8);
-    if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
+    if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
       addBypassSlowDiv(64, 16);
   }
 
-  if (Subtarget->isTargetKnownWindowsMSVC()) {
+  if (Subtarget.isTargetKnownWindowsMSVC()) {
     // Setup Windows compiler runtime calls.
     setLibcallName(RTLIB::SDIV_I64, "_alldiv");
     setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
@@ -117,11 +117,11 @@ X86TargetLowering::X86TargetLowering(con
     setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
   }
 
-  if (Subtarget->isTargetDarwin()) {
+  if (Subtarget.isTargetDarwin()) {
     // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
     setUseUnderscoreSetJmp(false);
     setUseUnderscoreLongJmp(false);
-  } else if (Subtarget->isTargetWindowsGNU()) {
+  } else if (Subtarget.isTargetWindowsGNU()) {
     // MS runtime is weird: it exports _setjmp, but longjmp!
     setUseUnderscoreSetJmp(true);
     setUseUnderscoreLongJmp(false);
@@ -134,7 +134,7 @@ X86TargetLowering::X86TargetLowering(con
   addRegisterClass(MVT::i8, &X86::GR8RegClass);
   addRegisterClass(MVT::i16, &X86::GR16RegClass);
   addRegisterClass(MVT::i32, &X86::GR32RegClass);
-  if (Subtarget->is64Bit())
+  if (Subtarget.is64Bit())
     addRegisterClass(MVT::i64, &X86::GR64RegClass);
 
   for (MVT VT : MVT::integer_valuetypes())
@@ -164,14 +164,14 @@ X86TargetLowering::X86TargetLowering(con
   setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
   setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
 
-  if (Subtarget->is64Bit()) {
-    if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512())
+  if (Subtarget.is64Bit()) {
+    if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
       // f32/f64 are legal, f80 is custom.
       setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Custom);
     else
       setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Promote);
     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
-  } else if (!Subtarget->useSoftFloat()) {
+  } else if (!Subtarget.useSoftFloat()) {
     // We have an algorithm for SSE2->double, and we turn this into a
     // 64-bit FILD followed by conditional FADD for other targets.
     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
@@ -185,7 +185,7 @@ X86TargetLowering::X86TargetLowering(con
   setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
   setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
 
-  if (!Subtarget->useSoftFloat()) {
+  if (!Subtarget.useSoftFloat()) {
     // SSE has no i16 to fp conversion, only i32
     if (X86ScalarSSEf32) {
       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
@@ -205,7 +205,7 @@ X86TargetLowering::X86TargetLowering(con
   setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
   setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
 
-  if (!Subtarget->useSoftFloat()) {
+  if (!Subtarget.useSoftFloat()) {
     // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
     // are Legal, f80 is custom lowered.
     setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
@@ -231,8 +231,8 @@ X86TargetLowering::X86TargetLowering(con
   setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
   setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
 
-  if (Subtarget->is64Bit()) {
-    if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
+  if (Subtarget.is64Bit()) {
+    if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
       // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
       setOperationAction(ISD::FP_TO_UINT   , MVT::i64  , Custom);
@@ -240,9 +240,9 @@ X86TargetLowering::X86TargetLowering(con
       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Promote);
       setOperationAction(ISD::FP_TO_UINT   , MVT::i64  , Expand);
     }
-  } else if (!Subtarget->useSoftFloat()) {
+  } else if (!Subtarget.useSoftFloat()) {
     // Since AVX is a superset of SSE3, only check for SSE here.
-    if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
+    if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
       // Expand FP_TO_UINT into a select.
       // FIXME: We would like to use a Custom expander here eventually to do
       // the optimal thing for SSE vs. the default expansion in the legalizer.
@@ -260,12 +260,12 @@ X86TargetLowering::X86TargetLowering(con
   if (!X86ScalarSSEf64) {
     setOperationAction(ISD::BITCAST        , MVT::f32  , Expand);
     setOperationAction(ISD::BITCAST        , MVT::i32  , Expand);
-    if (Subtarget->is64Bit()) {
+    if (Subtarget.is64Bit()) {
       setOperationAction(ISD::BITCAST      , MVT::f64  , Expand);
       // Without SSE, i64->f64 goes through memory.
       setOperationAction(ISD::BITCAST      , MVT::i64  , Expand);
     }
-  } else if (!Subtarget->is64Bit())
+  } else if (!Subtarget.is64Bit())
     setOperationAction(ISD::BITCAST      , MVT::i64  , Custom);
 
   // Scalar integer divide and remainder are lowered to use operations that
@@ -311,14 +311,14 @@ X86TargetLowering::X86TargetLowering(con
   setOperationAction(ISD::SELECT_CC        , MVT::i16,   Expand);
   setOperationAction(ISD::SELECT_CC        , MVT::i32,   Expand);
   setOperationAction(ISD::SELECT_CC        , MVT::i64,   Expand);
-  if (Subtarget->is64Bit())
+  if (Subtarget.is64Bit())
     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Legal);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Legal);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
   setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
 
-  if (Subtarget->is32Bit() && Subtarget->isTargetKnownWindowsMSVC()) {
+  if (Subtarget.is32Bit() && Subtarget.isTargetKnownWindowsMSVC()) {
     // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
     // is. We should promote the value to 64-bits to solve this.
     // This is what the CRT headers do - `fmodf` is an inline header
@@ -338,19 +338,19 @@ X86TargetLowering::X86TargetLowering(con
   AddPromotedToType (ISD::CTTZ             , MVT::i8   , MVT::i32);
   setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i8   , Promote);
   AddPromotedToType (ISD::CTTZ_ZERO_UNDEF  , MVT::i8   , MVT::i32);
-  if (Subtarget->hasBMI()) {
+  if (Subtarget.hasBMI()) {
     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16  , Expand);
     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32  , Expand);
-    if (Subtarget->is64Bit())
+    if (Subtarget.is64Bit())
       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
   } else {
     setOperationAction(ISD::CTTZ           , MVT::i16  , Custom);
     setOperationAction(ISD::CTTZ           , MVT::i32  , Custom);
-    if (Subtarget->is64Bit())
+    if (Subtarget.is64Bit())
       setOperationAction(ISD::CTTZ         , MVT::i64  , Custom);
   }
 
-  if (Subtarget->hasLZCNT()) {
+  if (Subtarget.hasLZCNT()) {
     // When promoting the i8 variants, force them to i32 for a shorter
     // encoding.
     setOperationAction(ISD::CTLZ           , MVT::i8   , Promote);
@@ -359,7 +359,7 @@ X86TargetLowering::X86TargetLowering(con
     AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8   , MVT::i32);
     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Expand);
     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Expand);
-    if (Subtarget->is64Bit())
+    if (Subtarget.is64Bit())
       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
   } else {
     setOperationAction(ISD::CTLZ           , MVT::i8   , Custom);
@@ -368,7 +368,7 @@ X86TargetLowering::X86TargetLowering(con
     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Custom);
     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Custom);
     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Custom);
-    if (Subtarget->is64Bit()) {
+    if (Subtarget.is64Bit()) {
       setOperationAction(ISD::CTLZ         , MVT::i64  , Custom);
       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
     }
@@ -377,7 +377,7 @@ X86TargetLowering::X86TargetLowering(con
   // Special handling for half-precision floating point conversions.
   // If we don't have F16C support, then lower half float conversions
   // into library calls.
-  if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) {
+  if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
   }
@@ -395,19 +395,19 @@ X86TargetLowering::X86TargetLowering(con
   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
   setTruncStoreAction(MVT::f80, MVT::f16, Expand);
 
-  if (Subtarget->hasPOPCNT()) {
+  if (Subtarget.hasPOPCNT()) {
     setOperationAction(ISD::CTPOP          , MVT::i8   , Promote);
   } else {
     setOperationAction(ISD::CTPOP          , MVT::i8   , Expand);
     setOperationAction(ISD::CTPOP          , MVT::i16  , Expand);
     setOperationAction(ISD::CTPOP          , MVT::i32  , Expand);
-    if (Subtarget->is64Bit())
+    if (Subtarget.is64Bit())
       setOperationAction(ISD::CTPOP        , MVT::i64  , Expand);
   }
 
   setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
 
-  if (!Subtarget->hasMOVBE())
+  if (!Subtarget.hasMOVBE())
     setOperationAction(ISD::BSWAP          , MVT::i16  , Expand);
 
   // These should be promoted to a larger select which is supported.
@@ -430,7 +430,7 @@ X86TargetLowering::X86TargetLowering(con
   setOperationAction(ISD::SETCCE          , MVT::i8   , Custom);
   setOperationAction(ISD::SETCCE          , MVT::i16  , Custom);
   setOperationAction(ISD::SETCCE          , MVT::i32  , Custom);
-  if (Subtarget->is64Bit()) {
+  if (Subtarget.is64Bit()) {
     setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
     setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
     setOperationAction(ISD::SETCCE        , MVT::i64  , Custom);
@@ -450,11 +450,11 @@ X86TargetLowering::X86TargetLowering(con
   setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
   setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
   setOperationAction(ISD::GlobalTLSAddress, MVT::i32  , Custom);
-  if (Subtarget->is64Bit())
+  if (Subtarget.is64Bit())
     setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
   setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
   setOperationAction(ISD::BlockAddress    , MVT::i32  , Custom);
-  if (Subtarget->is64Bit()) {
+  if (Subtarget.is64Bit()) {
     setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
     setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
     setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
@@ -465,13 +465,13 @@ X86TargetLowering::X86TargetLowering(con
   setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
   setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
   setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
-  if (Subtarget->is64Bit()) {
+  if (Subtarget.is64Bit()) {
     setOperationAction(ISD::SHL_PARTS     , MVT::i64  , Custom);
     setOperationAction(ISD::SRA_PARTS     , MVT::i64  , Custom);
     setOperationAction(ISD::SRL_PARTS     , MVT::i64  , Custom);
   }
 
-  if (Subtarget->hasSSE1())
+  if (Subtarget.hasSSE1())
     setOperationAction(ISD::PREFETCH      , MVT::Other, Legal);
 
   setOperationAction(ISD::ATOMIC_FENCE  , MVT::Other, Custom);
@@ -483,13 +483,13 @@ X86TargetLowering::X86TargetLowering(con
     setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
   }
 
-  if (Subtarget->hasCmpxchg16b()) {
+  if (Subtarget.hasCmpxchg16b()) {
     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
   }
 
   // FIXME - use subtarget debug flags
-  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
-      !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
+  if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
+      !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64()) {
     setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
   }
 
@@ -505,7 +505,7 @@ X86TargetLowering::X86TargetLowering(con
   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
-  if (Subtarget->is64Bit()) {
+  if (Subtarget.is64Bit()) {
     setOperationAction(ISD::VAARG           , MVT::Other, Custom);
     setOperationAction(ISD::VACOPY          , MVT::Other, Custom);
   } else {
@@ -523,7 +523,7 @@ X86TargetLowering::X86TargetLowering(con
   setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
   setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
 
-  if (!Subtarget->useSoftFloat() && X86ScalarSSEf64) {
+  if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
     // f32 and f64 use SSE.
     // Set up the FP register classes.
     addRegisterClass(MVT::f32, &X86::FR32RegClass);
@@ -557,7 +557,7 @@ X86TargetLowering::X86TargetLowering(con
     // cases we handle.
     addLegalFPImmediate(APFloat(+0.0)); // xorpd
     addLegalFPImmediate(APFloat(+0.0f)); // xorps
-  } else if (!Subtarget->useSoftFloat() && X86ScalarSSEf32) {
+  } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32) {
     // Use SSE for f32, x87 for f64.
     // Set up the FP register classes.
     addRegisterClass(MVT::f32, &X86::FR32RegClass);
@@ -592,7 +592,7 @@ X86TargetLowering::X86TargetLowering(con
       setOperationAction(ISD::FCOS   , MVT::f64, Expand);
       setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
     }
-  } else if (!Subtarget->useSoftFloat()) {
+  } else if (!Subtarget.useSoftFloat()) {
     // f32 and f64 in x87.
     // Set up the FP register classes.
     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
@@ -626,8 +626,8 @@ X86TargetLowering::X86TargetLowering(con
   setOperationAction(ISD::FMA, MVT::f32, Expand);
 
   // Long double always uses X87, except f128 in MMX.
-  if (!Subtarget->useSoftFloat()) {
-    if (Subtarget->is64Bit() && Subtarget->hasMMX()) {
+  if (!Subtarget.useSoftFloat()) {
+    if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
       addRegisterClass(MVT::f128, &X86::FR128RegClass);
       ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
       setOperationAction(ISD::FABS , MVT::f128, Custom);
@@ -774,7 +774,7 @@ X86TargetLowering::X86TargetLowering(con
 
   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
   // with -msoft-float, disable use of MMX as well.
-  if (!Subtarget->useSoftFloat() && Subtarget->hasMMX()) {
+  if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
     addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
     // No operations on x86mmx supported, everything uses intrinsics.
   }
@@ -792,7 +792,7 @@ X86TargetLowering::X86TargetLowering(con
   }
   setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v1i64, Expand);
 
-  if (!Subtarget->useSoftFloat() && Subtarget->hasSSE1()) {
+  if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
     addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
 
     setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
@@ -811,7 +811,7 @@ X86TargetLowering::X86TargetLowering(con
     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Custom);
   }
 
-  if (!Subtarget->useSoftFloat() && Subtarget->hasSSE2()) {
+  if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
     addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
 
     // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
@@ -908,7 +908,7 @@ X86TargetLowering::X86TargetLowering(con
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2f64, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
 
-    if (Subtarget->is64Bit()) {
+    if (Subtarget.is64Bit()) {
       setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
     }
@@ -942,7 +942,7 @@ X86TargetLowering::X86TargetLowering(con
     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i16, Custom);
     // As there is no 64-bit GPR available, we need build a special custom
     // sequence to convert from v2i32 to v2f32.
-    if (!Subtarget->is64Bit())
+    if (!Subtarget.is64Bit())
       setOperationAction(ISD::UINT_TO_FP,       MVT::v2f32, Custom);
 
     setOperationAction(ISD::FP_EXTEND,          MVT::v2f32, Custom);
@@ -956,7 +956,7 @@ X86TargetLowering::X86TargetLowering(con
     setOperationAction(ISD::BITCAST,            MVT::v8i8,  Custom);
   }
 
-  if (!Subtarget->useSoftFloat() && Subtarget->hasSSE41()) {
+  if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
     for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
       setOperationAction(ISD::FFLOOR,           RoundedTy,  Legal);
       setOperationAction(ISD::FCEIL,            RoundedTy,  Legal);
@@ -1020,13 +1020,13 @@ X86TargetLowering::X86TargetLowering(con
 
     // FIXME: these should be Legal, but that's only for the case where
     // the index is constant.  For now custom expand to deal with that.
-    if (Subtarget->is64Bit()) {
+    if (Subtarget.is64Bit()) {
       setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
     }
   }
 
-  if (Subtarget->hasSSE2()) {
+  if (Subtarget.hasSSE2()) {
     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
@@ -1052,7 +1052,7 @@ X86TargetLowering::X86TargetLowering(con
     setOperationAction(ISD::SRA,               MVT::v4i32, Custom);
   }
 
-  if (Subtarget->hasXOP()) {
+  if (Subtarget.hasXOP()) {
     setOperationAction(ISD::ROTL,              MVT::v16i8, Custom);
     setOperationAction(ISD::ROTL,              MVT::v8i16, Custom);
     setOperationAction(ISD::ROTL,              MVT::v4i32, Custom);
@@ -1063,7 +1063,7 @@ X86TargetLowering::X86TargetLowering(con
     setOperationAction(ISD::ROTL,              MVT::v4i64, Custom);
   }
 
-  if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) {
+  if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) {
     addRegisterClass(MVT::v32i8,  &X86::VR256RegClass);
     addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
     addRegisterClass(MVT::v8i32,  &X86::VR256RegClass);
@@ -1162,7 +1162,7 @@ X86TargetLowering::X86TargetLowering(con
     setOperationAction(ISD::CTTZ_ZERO_UNDEF,   MVT::v8i32, Custom);
     setOperationAction(ISD::CTTZ_ZERO_UNDEF,   MVT::v4i64, Custom);
 
-    if (Subtarget->hasAnyFMA()) {
+    if (Subtarget.hasAnyFMA()) {
       setOperationAction(ISD::FMA,             MVT::v8f32, Legal);
       setOperationAction(ISD::FMA,             MVT::v4f64, Legal);
       setOperationAction(ISD::FMA,             MVT::v4f32, Legal);
@@ -1171,7 +1171,7 @@ X86TargetLowering::X86TargetLowering(con
       setOperationAction(ISD::FMA,             MVT::f64, Legal);
     }
 
-    if (Subtarget->hasInt256()) {
+    if (Subtarget.hasInt256()) {
       setOperationAction(ISD::ADD,             MVT::v4i64, Legal);
       setOperationAction(ISD::ADD,             MVT::v8i32, Legal);
       setOperationAction(ISD::ADD,             MVT::v16i16, Legal);
@@ -1289,7 +1289,7 @@ X86TargetLowering::X86TargetLowering(con
       setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);
     }
 
-    if (Subtarget->hasInt256())
+    if (Subtarget.hasInt256())
       setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
 
     // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
@@ -1307,7 +1307,7 @@ X86TargetLowering::X86TargetLowering(con
     }
   }
 
-  if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
+  if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
     addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
     addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
     addRegisterClass(MVT::v8i64,  &X86::VR512RegClass);
@@ -1388,7 +1388,7 @@ X86TargetLowering::X86TargetLowering(con
     setTruncStoreAction(MVT::v8i64,   MVT::v8i32,  Legal);
     setTruncStoreAction(MVT::v16i32,  MVT::v16i8,  Legal);
     setTruncStoreAction(MVT::v16i32,  MVT::v16i16, Legal);
-    if (Subtarget->hasVLX()){
+    if (Subtarget.hasVLX()){
       setTruncStoreAction(MVT::v4i64, MVT::v4i8,  Legal);
       setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
       setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
@@ -1411,7 +1411,7 @@ X86TargetLowering::X86TargetLowering(con
     setOperationAction(ISD::TRUNCATE,           MVT::v8i32, Custom);
     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v8i1,  Custom);
     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v16i1, Custom);
-    if (Subtarget->hasDQI()) {
+    if (Subtarget.hasDQI()) {
       setOperationAction(ISD::TRUNCATE,         MVT::v2i1, Custom);
       setOperationAction(ISD::TRUNCATE,         MVT::v4i1, Custom);
 
@@ -1419,7 +1419,7 @@ X86TargetLowering::X86TargetLowering(con
       setOperationAction(ISD::UINT_TO_FP,       MVT::v8i64, Legal);
       setOperationAction(ISD::FP_TO_SINT,       MVT::v8i64, Legal);
       setOperationAction(ISD::FP_TO_UINT,       MVT::v8i64, Legal);
-      if (Subtarget->hasVLX()) {
+      if (Subtarget.hasVLX()) {
         setOperationAction(ISD::SINT_TO_FP,    MVT::v4i64, Legal);
         setOperationAction(ISD::SINT_TO_FP,    MVT::v2i64, Legal);
         setOperationAction(ISD::UINT_TO_FP,    MVT::v4i64, Legal);
@@ -1430,7 +1430,7 @@ X86TargetLowering::X86TargetLowering(con
         setOperationAction(ISD::FP_TO_UINT,    MVT::v2i64, Legal);
       }
     }
-    if (Subtarget->hasVLX()) {
+    if (Subtarget.hasVLX()) {
       setOperationAction(ISD::SINT_TO_FP,       MVT::v8i32, Legal);
       setOperationAction(ISD::UINT_TO_FP,       MVT::v8i32, Legal);
       setOperationAction(ISD::FP_TO_SINT,       MVT::v8i32, Legal);
@@ -1452,7 +1452,7 @@ X86TargetLowering::X86TargetLowering(con
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i8, Custom);
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i16, Custom);
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i16, Custom);
-    if (Subtarget->hasDQI()) {
+    if (Subtarget.hasDQI()) {
       setOperationAction(ISD::SIGN_EXTEND,        MVT::v4i32, Custom);
       setOperationAction(ISD::SIGN_EXTEND,        MVT::v2i64, Custom);
     }
@@ -1524,7 +1524,7 @@ X86TargetLowering::X86TargetLowering(con
     setOperationAction(ISD::OR,                 MVT::v16i32, Legal);
     setOperationAction(ISD::XOR,                MVT::v16i32, Legal);
 
-    if (Subtarget->hasCDI()) {
+    if (Subtarget.hasCDI()) {
       setOperationAction(ISD::CTLZ,             MVT::v8i64,  Legal);
       setOperationAction(ISD::CTLZ,             MVT::v16i32, Legal);
       setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v8i64,  Expand);
@@ -1542,7 +1542,7 @@ X86TargetLowering::X86TargetLowering(con
       setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v8i64,  Custom);
       setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v16i32, Custom);
 
-      if (Subtarget->hasVLX()) {
+      if (Subtarget.hasVLX()) {
         setOperationAction(ISD::CTLZ,             MVT::v4i64, Legal);
         setOperationAction(ISD::CTLZ,             MVT::v8i32, Legal);
         setOperationAction(ISD::CTLZ,             MVT::v2i64, Legal);
@@ -1566,9 +1566,9 @@ X86TargetLowering::X86TargetLowering(con
         setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v2i64, Expand);
         setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v4i32, Expand);
       }
-    } // Subtarget->hasCDI()
+    } // Subtarget.hasCDI()
 
-    if (Subtarget->hasDQI()) {
+    if (Subtarget.hasDQI()) {
       setOperationAction(ISD::MUL,             MVT::v2i64, Legal);
       setOperationAction(ISD::MUL,             MVT::v4i64, Legal);
       setOperationAction(ISD::MUL,             MVT::v8i64, Legal);
@@ -1617,7 +1617,7 @@ X86TargetLowering::X86TargetLowering(con
     }
   }// has  AVX-512
 
-  if (!Subtarget->useSoftFloat() && Subtarget->hasBWI()) {
+  if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
     addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
     addRegisterClass(MVT::v64i8,  &X86::VR512RegClass);
 
@@ -1678,10 +1678,10 @@ X86TargetLowering::X86TargetLowering(con
 
     setTruncStoreAction(MVT::v32i16,  MVT::v32i8, Legal);
     setTruncStoreAction(MVT::v16i16,  MVT::v16i8, Legal);
-    if (Subtarget->hasVLX())
+    if (Subtarget.hasVLX())
       setTruncStoreAction(MVT::v8i16,   MVT::v8i8,  Legal);
 
-    if (Subtarget->hasCDI()) {
+    if (Subtarget.hasCDI()) {
       setOperationAction(ISD::CTLZ,            MVT::v32i16, Custom);
       setOperationAction(ISD::CTLZ,            MVT::v64i8,  Custom);
       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i16, Expand);
@@ -1704,7 +1704,7 @@ X86TargetLowering::X86TargetLowering(con
     }
   }
 
-  if (!Subtarget->useSoftFloat() && Subtarget->hasVLX()) {
+  if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
     addRegisterClass(MVT::v4i1,   &X86::VK4RegClass);
     addRegisterClass(MVT::v2i1,   &X86::VK2RegClass);
 
@@ -1744,7 +1744,7 @@ X86TargetLowering::X86TargetLowering(con
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
-  if (!Subtarget->is64Bit()) {
+  if (!Subtarget.is64Bit()) {
     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
   }
@@ -1756,7 +1756,7 @@ X86TargetLowering::X86TargetLowering(con
   // subtraction on x86-32 once PR3203 is fixed.  We really can't do much better
   // than generic legalization for 64-bit multiplication-with-overflow, though.
   for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
-    if (VT == MVT::i64 && !Subtarget->is64Bit())
+    if (VT == MVT::i64 && !Subtarget.is64Bit())
       continue;
     // Add/Sub/Mul with overflow operations are custom lowered.
     setOperationAction(ISD::SADDO, VT, Custom);
@@ -1767,7 +1767,7 @@ X86TargetLowering::X86TargetLowering(con
     setOperationAction(ISD::UMULO, VT, Custom);
   }
 
-  if (!Subtarget->is64Bit()) {
+  if (!Subtarget.is64Bit()) {
     // These libcalls are not available in 32-bit.
     setLibcallName(RTLIB::SHL_I128, nullptr);
     setLibcallName(RTLIB::SRL_I128, nullptr);
@@ -1775,10 +1775,10 @@ X86TargetLowering::X86TargetLowering(con
   }
 
   // Combine sin / cos into one node or libcall if possible.
-  if (Subtarget->hasSinCos()) {
+  if (Subtarget.hasSinCos()) {
     setLibcallName(RTLIB::SINCOS_F32, "sincosf");
     setLibcallName(RTLIB::SINCOS_F64, "sincos");
-    if (Subtarget->isTargetDarwin()) {
+    if (Subtarget.isTargetDarwin()) {
       // For MacOSX, we don't want the normal expansion of a libcall to sincos.
       // We want to issue a libcall to __sincos_stret to avoid memory traffic.
       setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
@@ -1786,7 +1786,7 @@ X86TargetLowering::X86TargetLowering(con
     }
   }
 
-  if (Subtarget->isTargetWin64()) {
+  if (Subtarget.isTargetWin64()) {
     setOperationAction(ISD::SDIV, MVT::i128, Custom);
     setOperationAction(ISD::UDIV, MVT::i128, Custom);
     setOperationAction(ISD::SREM, MVT::i128, Custom);
@@ -1832,7 +1832,7 @@ X86TargetLowering::X86TargetLowering(con
   setTargetDAGCombine(ISD::MSCATTER);
   setTargetDAGCombine(ISD::MGATHER);
 
-  computeRegisterProperties(Subtarget->getRegisterInfo());
+  computeRegisterProperties(Subtarget.getRegisterInfo());
 
   MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
   MaxStoresPerMemsetOptSize = 8;
@@ -1844,7 +1844,7 @@ X86TargetLowering::X86TargetLowering(con
 
   // A predictable cmov does not hurt on an in-order CPU.
   // FIXME: Use a CPU attribute to trigger this, not a CPU model.
-  PredictableSelectIsExpensive = !Subtarget->isAtom();
+  PredictableSelectIsExpensive = !Subtarget.isAtom();
   EnableExtLdPromotion = true;
   setPrefFunctionAlignment(4); // 2^4 bytes.
 
@@ -1853,7 +1853,7 @@ X86TargetLowering::X86TargetLowering(con
 
 // This has so far only been implemented for 64-bit MachO.
 bool X86TargetLowering::useLoadStackGuardNode() const {
-  return Subtarget->isTargetMachO() && Subtarget->is64Bit();
+  return Subtarget.isTargetMachO() && Subtarget.is64Bit();
 }
 
 TargetLoweringBase::LegalizeTypeAction
@@ -1869,21 +1869,21 @@ X86TargetLowering::getPreferredVectorAct
 EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
                                           EVT VT) const {
   if (!VT.isVector())
-    return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
+    return Subtarget.hasAVX512() ? MVT::i1: MVT::i8;
 
   if (VT.isSimple()) {
     MVT VVT = VT.getSimpleVT();
     const unsigned NumElts = VVT.getVectorNumElements();
     const MVT EltVT = VVT.getVectorElementType();
     if (VVT.is512BitVector()) {
-      if (Subtarget->hasAVX512())
+      if (Subtarget.hasAVX512())
         if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
             EltVT == MVT::f32 || EltVT == MVT::f64)
           switch(NumElts) {
           case  8: return MVT::v8i1;
           case 16: return MVT::v16i1;
         }
-      if (Subtarget->hasBWI())
+      if (Subtarget.hasBWI())
         if (EltVT == MVT::i8 || EltVT == MVT::i16)
           switch(NumElts) {
           case 32: return MVT::v32i1;
@@ -1892,7 +1892,7 @@ EVT X86TargetLowering::getSetCCResultTyp
     }
 
     if (VVT.is256BitVector() || VVT.is128BitVector()) {
-      if (Subtarget->hasVLX())
+      if (Subtarget.hasVLX())
         if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
             EltVT == MVT::f32 || EltVT == MVT::f64)
           switch(NumElts) {
@@ -1900,7 +1900,7 @@ EVT X86TargetLowering::getSetCCResultTyp
           case 4: return MVT::v4i1;
           case 8: return MVT::v8i1;
         }
-      if (Subtarget->hasBWI() && Subtarget->hasVLX())
+      if (Subtarget.hasBWI() && Subtarget.hasVLX())
         if (EltVT == MVT::i8 || EltVT == MVT::i16)
           switch(NumElts) {
           case  8: return MVT::v8i1;
@@ -1944,7 +1944,7 @@ static void getMaxByValAlign(Type *Ty, u
 /// are at 4-byte boundaries.
 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
                                                   const DataLayout &DL) const {
-  if (Subtarget->is64Bit()) {
+  if (Subtarget.is64Bit()) {
     // Max of 8 and alignment of type.
     unsigned TyAlign = DL.getABITypeAlignment(Ty);
     if (TyAlign > 8)
@@ -1953,7 +1953,7 @@ unsigned X86TargetLowering::getByValType
   }
 
   unsigned Align = 4;
-  if (Subtarget->hasSSE1())
+  if (Subtarget.hasSSE1())
     getMaxByValAlign(Ty, Align);
   return Align;
 }
@@ -1979,23 +1979,23 @@ X86TargetLowering::getOptimalMemOpType(u
   if ((!IsMemset || ZeroMemset) &&
       !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
     if (Size >= 16 &&
-        (!Subtarget->isUnalignedMem16Slow() ||
+        (!Subtarget.isUnalignedMem16Slow() ||
          ((DstAlign == 0 || DstAlign >= 16) &&
           (SrcAlign == 0 || SrcAlign >= 16)))) {
       if (Size >= 32) {
         // FIXME: Check if unaligned 32-byte accesses are slow.
-        if (Subtarget->hasInt256())
+        if (Subtarget.hasInt256())
           return MVT::v8i32;
-        if (Subtarget->hasFp256())
+        if (Subtarget.hasFp256())
           return MVT::v8f32;
       }
-      if (Subtarget->hasSSE2())
+      if (Subtarget.hasSSE2())
         return MVT::v4i32;
-      if (Subtarget->hasSSE1())
+      if (Subtarget.hasSSE1())
         return MVT::v4f32;
     } else if (!MemcpyStrSrc && Size >= 8 &&
-               !Subtarget->is64Bit() &&
-               Subtarget->hasSSE2()) {
+               !Subtarget.is64Bit() &&
+               Subtarget.hasSSE2()) {
       // Do not use f64 to lower memcpy if source is string constant. It's
       // better to use i32 to avoid the loads.
       return MVT::f64;
@@ -2004,7 +2004,7 @@ X86TargetLowering::getOptimalMemOpType(u
   // This is a compromise. If we reach here, unaligned accesses may be slow on
   // this target. However, creating smaller, aligned accesses could be even
   // slower and would certainly be a lot more code.
-  if (Subtarget->is64Bit() && Size >= 8)
+  if (Subtarget.is64Bit() && Size >= 8)
     return MVT::i64;
   return MVT::i32;
 }
@@ -2029,10 +2029,10 @@ X86TargetLowering::allowsMisalignedMemor
       *Fast = true;
       break;
     case 128:
-      *Fast = !Subtarget->isUnalignedMem16Slow();
+      *Fast = !Subtarget.isUnalignedMem16Slow();
       break;
     case 256:
-      *Fast = !Subtarget->isUnalignedMem32Slow();
+      *Fast = !Subtarget.isUnalignedMem32Slow();
       break;
     // TODO: What about AVX-512 (512-bit) accesses?
     }
@@ -2048,7 +2048,7 @@ unsigned X86TargetLowering::getJumpTable
   // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
   // symbol.
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
-      Subtarget->isPICStyleGOT())
+      Subtarget.isPICStyleGOT())
     return MachineJumpTableInfo::EK_Custom32;
 
   // Otherwise, use the normal jump table encoding heuristics.
@@ -2056,7 +2056,7 @@ unsigned X86TargetLowering::getJumpTable
 }
 
 bool X86TargetLowering::useSoftFloat() const {
-  return Subtarget->useSoftFloat();
+  return Subtarget.useSoftFloat();
 }
 
 const MCExpr *
@@ -2064,7 +2064,7 @@ X86TargetLowering::LowerCustomJumpTableE
                                              const MachineBasicBlock *MBB,
                                              unsigned uid,MCContext &Ctx) const{
   assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
-         Subtarget->isPICStyleGOT());
+         Subtarget.isPICStyleGOT());
   // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
   // entries.
   return MCSymbolRefExpr::create(MBB->getSymbol(),
@@ -2074,7 +2074,7 @@ X86TargetLowering::LowerCustomJumpTableE
 /// Returns relocation base for the given PIC jumptable.
 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
                                                     SelectionDAG &DAG) const {
-  if (!Subtarget->is64Bit())
+  if (!Subtarget.is64Bit())
     // This doesn't have SDLoc associated with it, but is not really the
     // same as a Register.
     return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
@@ -2088,7 +2088,7 @@ const MCExpr *X86TargetLowering::
 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
                              MCContext &Ctx) const {
   // X86-64 uses RIP relative addressing based on the jump table label.
-  if (Subtarget->isPICStyleRIPRel())
+  if (Subtarget.isPICStyleRIPRel())
     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
 
   // Otherwise, the reference is relative to the PIC base.
@@ -2104,7 +2104,7 @@ X86TargetLowering::findRepresentativeCla
   default:
     return TargetLowering::findRepresentativeClass(TRI, VT);
   case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
-    RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
+    RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
     break;
   case MVT::x86mmx:
     RRC = &X86::VR64RegClass;
@@ -2122,10 +2122,10 @@ X86TargetLowering::findRepresentativeCla
 
 bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
                                                unsigned &Offset) const {
-  if (!Subtarget->isTargetLinux())
+  if (!Subtarget.isTargetLinux())
     return false;
 
-  if (Subtarget->is64Bit()) {
+  if (Subtarget.is64Bit()) {
     // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
     Offset = 0x28;
     if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
@@ -2141,14 +2141,14 @@ bool X86TargetLowering::getStackCookieLo
 }
 
 Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
-  if (!Subtarget->isTargetAndroid())
+  if (!Subtarget.isTargetAndroid())
     return TargetLowering::getSafeStackPointerLocation(IRB);
 
   // Android provides a fixed TLS slot for the SafeStack pointer. See the
   // definition of TLS_SLOT_SAFESTACK in
   // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
   unsigned AddressSpace, Offset;
-  if (Subtarget->is64Bit()) {
+  if (Subtarget.is64Bit()) {
     // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
     Offset = 0x48;
     if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
@@ -2243,14 +2243,14 @@ X86TargetLowering::LowerReturn(SDValue C
     // or SSE or MMX vectors.
     if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
          VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
-          (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
+          (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
       report_fatal_error("SSE register return with SSE disabled");
     }
     // Likewise we can't return F64 values with SSE1 only.  gcc does so, but
     // llvm-gcc has never done it right and no one has noticed, so this
     // should be OK for now.
     if (ValVT == MVT::f64 &&
-        (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
+        (Subtarget.is64Bit() && !Subtarget.hasSSE2()))
       report_fatal_error("SSE2 register return with SSE2 disabled");
 
     // Returns in ST0/ST1 are handled specially: these are pushed as operands to
@@ -2268,7 +2268,7 @@ X86TargetLowering::LowerReturn(SDValue C
 
     // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
     // which is returned in RAX / RDX.
-    if (Subtarget->is64Bit()) {
+    if (Subtarget.is64Bit()) {
       if (ValVT == MVT::x86mmx) {
         if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
           ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
@@ -2276,7 +2276,7 @@ X86TargetLowering::LowerReturn(SDValue C
                                   ValToCopy);
           // If we don't have SSE2 available, convert to v4f32 so the generated
           // register is legal.
-          if (!Subtarget->hasSSE2())
+          if (!Subtarget.hasSSE2())
             ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
         }
       }
@@ -2301,7 +2301,7 @@ X86TargetLowering::LowerReturn(SDValue C
                                      getPointerTy(MF.getDataLayout()));
 
     unsigned RetValReg
-        = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
+        = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
           X86::RAX : X86::EAX;
     Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
     Flag = Chain.getValue(1);
@@ -2311,7 +2311,7 @@ X86TargetLowering::LowerReturn(SDValue C
         DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
   }
 
-  const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
   const MCPhysReg *I =
       TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
   if (I) {
@@ -2379,7 +2379,7 @@ X86TargetLowering::getTypeForExtArgOrRet
                                             ISD::NodeType ExtendKind) const {
   MVT ReturnMVT;
   // TODO: Is this also valid on 32-bit?
-  if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
+  if (Subtarget.is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
     ReturnMVT = MVT::i8;
   else
     ReturnMVT = MVT::i32;
@@ -2400,7 +2400,7 @@ X86TargetLowering::LowerCallResult(SDVal
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
-  bool Is64Bit = Subtarget->is64Bit();
+  bool Is64Bit = Subtarget.is64Bit();
   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
                  *DAG.getContext());
   CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
@@ -2412,7 +2412,7 @@ X86TargetLowering::LowerCallResult(SDVal
 
     // If this is x86-64, and we disabled SSE, we can't return FP values
     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
-        ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
+        ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget.hasSSE1())) {
       report_fatal_error("SSE register return with SSE disabled");
     }
 
@@ -2618,10 +2618,10 @@ X86TargetLowering::LowerMemArgument(SDVa
 
 // FIXME: Get this from tablegen.
 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
-                                                const X86Subtarget *Subtarget) {
-  assert(Subtarget->is64Bit());
+                                                const X86Subtarget &Subtarget) {
+  assert(Subtarget.is64Bit());
 
-  if (Subtarget->isCallingConvWin64(CallConv)) {
+  if (Subtarget.isCallingConvWin64(CallConv)) {
     static const MCPhysReg GPR64ArgRegsWin64[] = {
       X86::RCX, X86::RDX, X86::R8,  X86::R9
     };
@@ -2637,9 +2637,9 @@ static ArrayRef<MCPhysReg> get64BitArgum
 // FIXME: Get this from tablegen.
 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
                                                 CallingConv::ID CallConv,
-                                                const X86Subtarget *Subtarget) {
-  assert(Subtarget->is64Bit());
-  if (Subtarget->isCallingConvWin64(CallConv)) {
+                                                const X86Subtarget &Subtarget) {
+  assert(Subtarget.is64Bit());
+  if (Subtarget.isCallingConvWin64(CallConv)) {
     // The XMM registers which might contain var arg parameters are shadowed
     // in their paired GPR.  So we only need to save the GPR to their home
     // slots.
@@ -2649,10 +2649,10 @@ static ArrayRef<MCPhysReg> get64BitArgum
 
   const Function *Fn = MF.getFunction();
   bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
-  bool isSoftFloat = Subtarget->useSoftFloat();
+  bool isSoftFloat = Subtarget.useSoftFloat();
   assert(!(isSoftFloat && NoImplicitFloatOps) &&
          "SSE register cannot be used when SSE is disabled!");
-  if (isSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
+  if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
     // Kernel mode asks for SSE to be disabled, so there are no XMM argument
     // registers.
     return None;
@@ -2670,17 +2670,17 @@ SDValue X86TargetLowering::LowerFormalAr
     SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
-  const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+  const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
 
   const Function* Fn = MF.getFunction();
   if (Fn->hasExternalLinkage() &&
-      Subtarget->isTargetCygMing() &&
+      Subtarget.isTargetCygMing() &&
       Fn->getName() == "main")
     FuncInfo->setForceFramePointer(true);
 
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool Is64Bit = Subtarget->is64Bit();
-  bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
+  bool Is64Bit = Subtarget.is64Bit();
+  bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
 
   assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
          "Var args not supported with calling convention fastcc, ghc or hipe");
@@ -2818,7 +2818,7 @@ SDValue X86TargetLowering::LowerFormalAr
   }
 
   // Figure out if XMM registers are in use.
-  assert(!(Subtarget->useSoftFloat() &&
+  assert(!(Subtarget.useSoftFloat() &&
            Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
          "SSE register cannot be used when SSE is disabled!");
 
@@ -2830,7 +2830,7 @@ SDValue X86TargetLowering::LowerFormalAr
     ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
     unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
-    assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
+    assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
            "SSE register cannot be used when SSE is disabled!");
 
     // Gather all the live in physical registers.
@@ -2912,13 +2912,13 @@ SDValue X86TargetLowering::LowerFormalAr
     // Find the largest legal vector type.
     MVT VecVT = MVT::Other;
     // FIXME: Only some x86_32 calling conventions support AVX512.
-    if (Subtarget->hasAVX512() &&
+    if (Subtarget.hasAVX512() &&
         (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
                      CallConv == CallingConv::Intel_OCL_BI)))
       VecVT = MVT::v16f32;
-    else if (Subtarget->hasAVX())
+    else if (Subtarget.hasAVX())
       VecVT = MVT::v8f32;
-    else if (Subtarget->hasSSE2())
+    else if (Subtarget.hasSSE2())
       VecVT = MVT::v4f32;
 
     // We forward some GPRs and some vector types.
@@ -2959,8 +2959,8 @@ SDValue X86TargetLowering::LowerFormalAr
     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
     // If this is an sret function, the return should pop the hidden pointer.
     if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
-        !Subtarget->getTargetTriple().isOSMSVCRT() &&
-        argsAreStructReturn(Ins, Subtarget->isTargetMCU()) == StackStructReturn)
+        !Subtarget.getTargetTriple().isOSMSVCRT() &&
+        argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
       FuncInfo->setBytesToPopOnReturn(4);
   }
 
@@ -3078,9 +3078,9 @@ X86TargetLowering::LowerCall(TargetLower
   bool isVarArg                         = CLI.IsVarArg;
 
   MachineFunction &MF = DAG.getMachineFunction();
-  bool Is64Bit        = Subtarget->is64Bit();
-  bool IsWin64        = Subtarget->isCallingConvWin64(CallConv);
-  StructReturnType SR = callIsStructReturn(Outs, Subtarget->isTargetMCU());
+  bool Is64Bit        = Subtarget.is64Bit();
+  bool IsWin64        = Subtarget.isCallingConvWin64(CallConv);
+  StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
   bool IsSibcall      = false;
   X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
   auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
@@ -3091,7 +3091,7 @@ X86TargetLowering::LowerCall(TargetLower
   if (Attr.getValueAsString() == "true")
     isTailCall = false;
 
-  if (Subtarget->isPICStyleGOT() &&
+  if (Subtarget.isPICStyleGOT() &&
       !MF.getTarget().Options.GuaranteedTailCallOpt) {
     // If we are using a GOT, disable tail calls to external symbols with
     // default visibility. Tail calling such a symbol requires using a GOT
@@ -3194,7 +3194,7 @@ X86TargetLowering::LowerCall(TargetLower
 
   // Walk the register/memloc assignments, inserting copies/loads.  In the case
   // of tail call optimization arguments are handle later.
-  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     // Skip inalloca arguments, they have already been written.
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
@@ -3272,7 +3272,7 @@ X86TargetLowering::LowerCall(TargetLower
   if (!MemOpChains.empty())
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
 
-  if (Subtarget->isPICStyleGOT()) {
+  if (Subtarget.isPICStyleGOT()) {
     // ELF / PIC requires GOT in the EBX register before function calls via PLT
     // GOT pointer.
     if (!isTailCall) {
@@ -3313,7 +3313,7 @@ X86TargetLowering::LowerCall(TargetLower
       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
     };
     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
-    assert((Subtarget->hasSSE1() || !NumXMMRegs)
+    assert((Subtarget.hasSSE1() || !NumXMMRegs)
            && "SSE registers cannot be used when SSE is disabled");
 
     RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
@@ -3423,19 +3423,19 @@ X86TargetLowering::LowerCall(TargetLower
       // external symbols most go through the PLT in PIC mode.  If the symbol
       // has hidden or protected visibility, or if it is static or local, then
       // we don't need to use the PLT - we can directly call it.
-      if (Subtarget->isTargetELF() &&
+      if (Subtarget.isTargetELF() &&
           DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
           GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
         OpFlags = X86II::MO_PLT;
-      } else if (Subtarget->isPICStyleStubAny() &&
+      } else if (Subtarget.isPICStyleStubAny() &&
                  !GV->isStrongDefinitionForLinker() &&
-                 (!Subtarget->getTargetTriple().isMacOSX() ||
-                  Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
+                 (!Subtarget.getTargetTriple().isMacOSX() ||
+                  Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
         // PC-relative references to external symbols should go through $stub,
         // unless we're building with the leopard linker or later, which
         // automatically synthesizes these stubs.
         OpFlags = X86II::MO_DARWIN_STUB;
-      } else if (Subtarget->isPICStyleRIPRel() && isa<Function>(GV) &&
+      } else if (Subtarget.isPICStyleRIPRel() && isa<Function>(GV) &&
                  cast<Function>(GV)->hasFnAttribute(Attribute::NonLazyBind)) {
         // If the function is marked as non-lazy, generate an indirect call
         // which loads from the GOT directly. This avoids runtime overhead
@@ -3464,12 +3464,12 @@ X86TargetLowering::LowerCall(TargetLower
 
     // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
     // external symbols should go through the PLT.
-    if (Subtarget->isTargetELF() &&
+    if (Subtarget.isTargetELF() &&
         DAG.getTarget().getRelocationModel() == Reloc::PIC_) {
       OpFlags = X86II::MO_PLT;
-    } else if (Subtarget->isPICStyleStubAny() &&
-               (!Subtarget->getTargetTriple().isMacOSX() ||
-                Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
+    } else if (Subtarget.isPICStyleStubAny() &&
+               (!Subtarget.getTargetTriple().isMacOSX() ||
+                Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
       // PC-relative references to external symbols should go through $stub,
       // unless we're building with the leopard linker or later, which
       // automatically synthesizes these stubs.
@@ -3478,7 +3478,7 @@ X86TargetLowering::LowerCall(TargetLower
 
     Callee = DAG.getTargetExternalSymbol(
         S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
-  } else if (Subtarget->isTarget64BitILP32() &&
+  } else if (Subtarget.isTarget64BitILP32() &&
              Callee->getValueType(0) == MVT::i32) {
     // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
     Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
@@ -3551,7 +3551,7 @@ X86TargetLowering::LowerCall(TargetLower
                        DAG.getTarget().Options.GuaranteedTailCallOpt))
     NumBytesForCalleeToPop = NumBytes;    // Callee pops everything
   else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
-           !Subtarget->getTargetTriple().isOSMSVCRT() &&
+           !Subtarget.getTargetTriple().isOSMSVCRT() &&
            SR == StackStructReturn)
     // If this is a call to a struct-return function, the callee
     // pops the hidden struct pointer, so we have to push it back.
@@ -3613,8 +3613,8 @@ X86TargetLowering::LowerCall(TargetLower
 unsigned
 X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
                                                SelectionDAG& DAG) const {
-  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
-  const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+  const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
   unsigned StackAlignment = TFI.getStackAlignment();
   uint64_t AlignMask = StackAlignment - 1;
   int64_t Offset = StackSize;
@@ -3707,8 +3707,8 @@ bool X86TargetLowering::IsEligibleForTai
 
   CallingConv::ID CallerCC = CallerF->getCallingConv();
   bool CCMatch = CallerCC == CalleeCC;
-  bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
-  bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC);
+  bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
+  bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
 
   // Win64 functions have extra shadow space for argument homing. Don't do the
   // sibcall if the caller and callee have mismatched expectations for this
@@ -3727,7 +3727,7 @@ bool X86TargetLowering::IsEligibleForTai
 
   // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
   // emit a special epilogue.
-  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   if (RegInfo->needsStackRealignment(MF))
     return false;
 
@@ -3829,7 +3829,7 @@ bool X86TargetLowering::IsEligibleForTai
       // the caller's fixed stack objects.
       MachineFrameInfo *MFI = MF.getFrameInfo();
       const MachineRegisterInfo *MRI = &MF.getRegInfo();
-      const X86InstrInfo *TII = Subtarget->getInstrInfo();
+      const X86InstrInfo *TII = Subtarget.getInstrInfo();
       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
         CCValAssign &VA = ArgLocs[i];
         SDValue Arg = OutVals[i];
@@ -3849,7 +3849,7 @@ bool X86TargetLowering::IsEligibleForTai
     // only target EAX, EDX, or ECX since the tail call must be scheduled after
     // callee-saved registers are restored. These happen to be the same
     // registers used to pass 'inreg' arguments so watch out for those.
-    if (!Subtarget->is64Bit() &&
+    if (!Subtarget.is64Bit() &&
         ((!isa<GlobalAddressSDNode>(Callee) &&
           !isa<ExternalSymbolSDNode>(Callee)) ||
          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
@@ -3876,7 +3876,7 @@ bool X86TargetLowering::IsEligibleForTai
   }
 
   bool CalleeWillPop =
-      X86::isCalleePop(CalleeCC, Subtarget->is64Bit(), isVarArg,
+      X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
                        MF.getTarget().Options.GuaranteedTailCallOpt);
 
   if (unsigned BytesToPop =
@@ -3978,7 +3978,7 @@ static SDValue getTargetShuffleNode(unsi
 
 SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
   int ReturnAddrIndex = FuncInfo->getRAIndex();
 
@@ -4289,12 +4289,12 @@ bool X86TargetLowering::isExtractSubvect
 
 bool X86TargetLowering::isCheapToSpeculateCttz() const {
   // Speculate cttz only if we can directly use TZCNT.
-  return Subtarget->hasBMI();
+  return Subtarget.hasBMI();
 }
 
 bool X86TargetLowering::isCheapToSpeculateCtlz() const {
   // Speculate ctlz only if we can directly use LZCNT.
-  return Subtarget->hasLZCNT();
+  return Subtarget.hasLZCNT();
 }
 
 /// Return true if every element in Mask, beginning
@@ -4474,7 +4474,7 @@ static SDValue getConstVector(ArrayRef<i
 }
 
 /// Returns a vector of specified type with all zero elements.
-static SDValue getZeroVector(MVT VT, const X86Subtarget *Subtarget,
+static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
                              SelectionDAG &DAG, SDLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
 
@@ -4482,7 +4482,7 @@ static SDValue getZeroVector(MVT VT, con
   // to their dest type. This ensures they get CSE'd.
   SDValue Vec;
   if (VT.is128BitVector()) {  // SSE
-    if (Subtarget->hasSSE2()) {  // SSE2
+    if (Subtarget.hasSSE2()) {  // SSE2
       SDValue Cst = DAG.getConstant(0, dl, MVT::i32);
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
     } else { // SSE1
@@ -4490,7 +4490,7 @@ static SDValue getZeroVector(MVT VT, con
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
     }
   } else if (VT.is256BitVector()) { // AVX
-    if (Subtarget->hasInt256()) { // AVX2
+    if (Subtarget.hasInt256()) { // AVX2
       SDValue Cst = DAG.getConstant(0, dl, MVT::i32);
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
@@ -4508,9 +4508,9 @@ static SDValue getZeroVector(MVT VT, con
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
   } else if (VT.getVectorElementType() == MVT::i1) {
 
-    assert((Subtarget->hasBWI() || VT.getVectorNumElements() <= 16)
+    assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16)
             && "Unexpected vector type");
-    assert((Subtarget->hasVLX() || VT.getVectorNumElements() >= 8)
+    assert((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8)
             && "Unexpected vector type");
     SDValue Cst = DAG.getConstant(0, dl, MVT::i1);
     SmallVector<SDValue, 64> Ops(VT.getVectorNumElements(), Cst);
@@ -4756,7 +4756,7 @@ static SDValue Concat256BitVectors(SDVal
 /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
 /// no AVX2 support, use two <4 x i32> inserted in a <8 x i32> appropriately.
 /// Then bitcast to their original type, ensuring they get CSE'd.
-static SDValue getOnesVector(EVT VT, const X86Subtarget *Subtarget,
+static SDValue getOnesVector(EVT VT, const X86Subtarget &Subtarget,
                              SelectionDAG &DAG, SDLoc dl) {
   assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
          "Expected a 128/256/512-bit vector type");
@@ -4764,7 +4764,7 @@ static SDValue getOnesVector(EVT VT, con
   APInt Ones = APInt::getAllOnesValue(32);
   unsigned NumElts = VT.getSizeInBits() / 32;
   SDValue Vec;
-  if (!Subtarget->hasInt256() && NumElts == 8) {
+  if (!Subtarget.hasInt256() && NumElts == 8) {
     Vec = DAG.getConstant(Ones, dl, MVT::v4i32);
     Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
   } else {
@@ -4803,7 +4803,7 @@ static SDValue getUnpackh(SelectionDAG &
 /// This produces a shuffle mask like 4,1,2,3 (idx=0) or  0,1,2,4 (idx=3).
 static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
                                            bool IsZero,
-                                           const X86Subtarget *Subtarget,
+                                           const X86Subtarget &Subtarget,
                                            SelectionDAG &DAG) {
   MVT VT = V2.getSimpleValueType();
   SDValue V1 = IsZero
@@ -5180,7 +5180,7 @@ static SDValue getShuffleScalarElt(SDNod
 static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
                                        unsigned NumNonZero, unsigned NumZero,
                                        SelectionDAG &DAG,
-                                       const X86Subtarget* Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        const TargetLowering &TLI) {
   if (NumNonZero > 8)
     return SDValue();
@@ -5190,7 +5190,7 @@ static SDValue LowerBuildVectorv16i8(SDV
   bool First = true;
 
   // SSE4.1 - use PINSRB to insert each byte directly.
-  if (Subtarget->hasSSE41()) {
+  if (Subtarget.hasSSE41()) {
     for (unsigned i = 0; i < 16; ++i) {
       bool isNonZero = (NonZeros & (1 << i)) != 0;
       if (isNonZero) {
@@ -5250,7 +5250,7 @@ static SDValue LowerBuildVectorv16i8(SDV
 static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
                                      unsigned NumNonZero, unsigned NumZero,
                                      SelectionDAG &DAG,
-                                     const X86Subtarget* Subtarget,
+                                     const X86Subtarget &Subtarget,
                                      const TargetLowering &TLI) {
   if (NumNonZero > 4)
     return SDValue();
@@ -5279,7 +5279,7 @@ static SDValue LowerBuildVectorv8i16(SDV
 
 /// Custom lower build_vector of v4i32 or v4f32.
 static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
-                                     const X86Subtarget *Subtarget,
+                                     const X86Subtarget &Subtarget,
                                      const TargetLowering &TLI) {
   // Find all zeroable elements.
   std::bitset<4> Zeroable;
@@ -5343,7 +5343,7 @@ static SDValue LowerBuildVectorv4x32(SDV
   }
 
   // See if we can lower this build_vector to a INSERTPS.
-  if (!Subtarget->hasSSE41())
+  if (!Subtarget.hasSSE41())
     return SDValue();
 
   SDValue V2 = Elt.getOperand(0);
@@ -5624,12 +5624,12 @@ static SDValue EltsFromConsecutiveLoads(
 /// a scalar load, or a constant.
 /// The VBROADCAST node is returned when a pattern is found,
 /// or SDValue() otherwise.
-static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
+static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget &Subtarget,
                                     SelectionDAG &DAG) {
   // VBROADCAST requires AVX.
   // TODO: Splats could be generated for non-AVX CPUs using SSE
   // instructions, but there's less potential gain for only 128-bit vectors.
-  if (!Subtarget->hasAVX())
+  if (!Subtarget.hasAVX())
     return SDValue();
 
   MVT VT = Op.getSimpleValueType();
@@ -5679,7 +5679,7 @@ static SDValue LowerVectorBroadcast(SDVa
       if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
           Sc.getOpcode() != ISD::BUILD_VECTOR) {
 
-        if (!Subtarget->hasInt256())
+        if (!Subtarget.hasInt256())
           return SDValue();
 
         // Use the register form of the broadcast instruction available on AVX2.
@@ -5697,7 +5697,7 @@ static SDValue LowerVectorBroadcast(SDVa
       // Constants may have multiple users.
 
       // AVX-512 has register version of the broadcast
-      bool hasRegVer = Subtarget->hasAVX512() && VT.is512BitVector() &&
+      bool hasRegVer = Subtarget.hasAVX512() && VT.is512BitVector() &&
         Ld.getValueType().getSizeInBits() >= 32;
       if (!ConstSplatVal && ((!Sc.hasOneUse() || !Ld.hasOneUse()) &&
           !hasRegVer))
@@ -5722,7 +5722,7 @@ static SDValue LowerVectorBroadcast(SDVa
   // from the constant pool and not to broadcast it from a scalar.
   // But override that restriction when optimizing for size.
   // TODO: Check if splatting is recommended for other AVX-capable CPUs.
-  if (ConstSplatVal && (Subtarget->hasAVX2() || OptForSize)) {
+  if (ConstSplatVal && (Subtarget.hasAVX2() || OptForSize)) {
     EVT CVT = Ld.getValueType();
     assert(!CVT.isVector() && "Must not broadcast a vector type");
 
@@ -5731,7 +5731,7 @@ static SDValue LowerVectorBroadcast(SDVa
     // with AVX2, also splat i8 and i16.
     // With pattern matching, the VBROADCAST node may become a VMOVDDUP.
     if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
-        (OptForSize && (ScalarSize == 64 || Subtarget->hasAVX2()))) {
+        (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) {
       const Constant *C = nullptr;
       if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
         C = CI->getConstantIntValue();
@@ -5756,7 +5756,7 @@ static SDValue LowerVectorBroadcast(SDVa
   bool IsLoad = ISD::isNormalLoad(Ld.getNode());
 
   // Handle AVX2 in-register broadcasts.
-  if (!IsLoad && Subtarget->hasInt256() &&
+  if (!IsLoad && Subtarget.hasInt256() &&
       (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
     return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
 
@@ -5765,12 +5765,12 @@ static SDValue LowerVectorBroadcast(SDVa
     return SDValue();
 
   if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
-      (Subtarget->hasVLX() && ScalarSize == 64))
+      (Subtarget.hasVLX() && ScalarSize == 64))
     return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
 
   // The integer check is needed for the 64-bit into 128-bit so it doesn't match
   // double since there is no vbroadcastsd xmm
-  if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) {
+  if (Subtarget.hasInt256() && Ld.getValueType().isInteger()) {
     if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
       return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
   }
@@ -6156,10 +6156,10 @@ static SDValue ExpandHorizontalBinOp(con
 /// Try to fold a build_vector that performs an 'addsub' to an X86ISD::ADDSUB
 /// node.
 static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
-                             const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+                             const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   MVT VT = BV->getSimpleValueType(0);
-  if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
-      (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
+  if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
+      (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
     return SDValue();
 
   SDLoc DL(BV);
@@ -6258,7 +6258,7 @@ static SDValue LowerToAddSub(const Build
 
 /// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
 static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
-                                   const X86Subtarget *Subtarget,
+                                   const X86Subtarget &Subtarget,
                                    SelectionDAG &DAG) {
   MVT VT = BV->getSimpleValueType(0);
   unsigned NumElts = VT.getVectorNumElements();
@@ -6282,14 +6282,14 @@ static SDValue LowerToHorizontalOp(const
 
   SDLoc DL(BV);
   SDValue InVec0, InVec1;
-  if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) {
+  if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) {
     // Try to match an SSE3 float HADD/HSUB.
     if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
       return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
 
     if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
       return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
-  } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) {
+  } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget.hasSSSE3()) {
     // Try to match an SSSE3 integer HADD/HSUB.
     if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
       return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
@@ -6298,7 +6298,7 @@ static SDValue LowerToHorizontalOp(const
       return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
   }
 
-  if (!Subtarget->hasAVX())
+  if (!Subtarget.hasAVX())
     return SDValue();
 
   if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
@@ -6346,7 +6346,7 @@ static SDValue LowerToHorizontalOp(const
     if (CanFold) {
       // Fold this build_vector into a single horizontal add/sub.
       // Do this only if the target has AVX2.
-      if (Subtarget->hasAVX2())
+      if (Subtarget.hasAVX2())
         return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
 
       // Do not try to expand this build_vector into a pair of horizontal
@@ -6364,7 +6364,7 @@ static SDValue LowerToHorizontalOp(const
   }
 
   if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
-       VT == MVT::v16i16) && Subtarget->hasAVX()) {
+       VT == MVT::v16i16) && Subtarget.hasAVX()) {
     unsigned X86Opcode;
     if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
       X86Opcode = X86ISD::HADD;
@@ -6408,7 +6408,7 @@ static SDValue materializeVectorConstant
     if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
       return Op;
 
-    return getZeroVector(VT, &Subtarget, DAG, DL);
+    return getZeroVector(VT, Subtarget, DAG, DL);
   }
 
   // Vectors containing all ones can be matched by pcmpeqd on 128-bit width
@@ -6419,7 +6419,7 @@ static SDValue materializeVectorConstant
       return Op;
 
     if (!VT.is512BitVector())
-      return getOnesVector(VT, &Subtarget, DAG, DL);
+      return getOnesVector(VT, Subtarget, DAG, DL);
   }
 
   return SDValue();
@@ -6434,10 +6434,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDV
   unsigned NumElems = Op.getNumOperands();
 
   // Generate vectors for predicate vectors.
-  if (VT.getVectorElementType() == MVT::i1 && Subtarget->hasAVX512())
+  if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())
     return LowerBUILD_VECTORvXi1(Op, DAG);
 
-  if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, *Subtarget))
+  if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget))
     return VectorConstant;
 
   BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
@@ -6486,7 +6486,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDV
     // insertion that way.  Only do this if the value is non-constant or if the
     // value is a constant being inserted into element 0.  It is cheaper to do
     // a constant pool load than it is to do a movd + shuffle.
-    if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
+    if (ExtVT == MVT::i64 && !Subtarget.is64Bit() &&
         (!IsAllConstants || Idx == 0)) {
       if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
         // Handle SSE only.
@@ -6511,7 +6511,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDV
         return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
 
       if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
-          (ExtVT == MVT::i64 && Subtarget->is64Bit())) {
+          (ExtVT == MVT::i64 && Subtarget.is64Bit())) {
         if (VT.is512BitVector()) {
           SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
           return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
@@ -6529,7 +6529,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDV
       if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
         Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
         if (VT.is256BitVector()) {
-          if (Subtarget->hasAVX()) {
+          if (Subtarget.hasAVX()) {
             Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v8i32, Item);
             Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
           } else {
@@ -6697,7 +6697,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDV
       return Sh;
 
     // For SSE 4.1, use insertps to put the high elements into the low element.
-    if (Subtarget->hasSSE41()) {
+    if (Subtarget.hasSSE41()) {
       SDValue Result;
       if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
         Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
@@ -6774,7 +6774,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SD
 }
 
 static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
-                                       const X86Subtarget *Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG & DAG) {
   SDLoc dl(Op);
   MVT ResVT = Op.getSimpleValueType();
@@ -6851,7 +6851,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(S
 }
 
 static SDValue LowerCONCAT_VECTORS(SDValue Op,
-                                   const X86Subtarget *Subtarget,
+                                   const X86Subtarget &Subtarget,
                                    SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
   if (VT.getVectorElementType() == MVT::i1)
@@ -7177,7 +7177,7 @@ static SDValue lowerVectorShuffleAsBitBl
 /// that the shuffle mask is a blend, or convertible into a blend with zero.
 static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
                                          SDValue V2, ArrayRef<int> Original,
-                                         const X86Subtarget *Subtarget,
+                                         const X86Subtarget &Subtarget,
                                          SelectionDAG &DAG) {
   bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
   bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
@@ -7240,13 +7240,13 @@ static SDValue lowerVectorShuffleAsBlend
 
   case MVT::v4i64:
   case MVT::v8i32:
-    assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!");
+    assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
     // FALLTHROUGH
   case MVT::v2i64:
   case MVT::v4i32:
     // If we have AVX2 it is faster to use VPBLENDD when the shuffle fits into
     // that instruction.
-    if (Subtarget->hasAVX2()) {
+    if (Subtarget.hasAVX2()) {
       // Scale the blend by the number of 32-bit dwords per element.
       int Scale =  VT.getScalarSizeInBits() / 32;
       BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale);
@@ -7271,7 +7271,7 @@ static SDValue lowerVectorShuffleAsBlend
   }
 
   case MVT::v16i16: {
-    assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!");
+    assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
     SmallVector<int, 8> RepeatedMask;
     if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
       // We can lower these with PBLENDW which is mirrored across 128-bit lanes.
@@ -7287,7 +7287,7 @@ static SDValue lowerVectorShuffleAsBlend
     // FALLTHROUGH
   case MVT::v16i8:
   case MVT::v32i8: {
-    assert((VT.is128BitVector() || Subtarget->hasAVX2()) &&
+    assert((VT.is128BitVector() || Subtarget.hasAVX2()) &&
            "256-bit byte-blends require AVX2 support!");
 
     // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
@@ -7425,7 +7425,7 @@ static SDValue lowerVectorShuffleAsDecom
 static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
                                               SDValue V2,
                                               ArrayRef<int> Mask,
-                                              const X86Subtarget *Subtarget,
+                                              const X86Subtarget &Subtarget,
                                               SelectionDAG &DAG) {
   assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
 
@@ -7503,7 +7503,7 @@ static SDValue lowerVectorShuffleAsByteR
   int Scale = 16 / NumLaneElts;
 
   // SSSE3 targets can use the palignr instruction.
-  if (Subtarget->hasSSSE3()) {
+  if (Subtarget.hasSSSE3()) {
     // Cast the inputs to i8 vector of correct length to match PALIGNR.
     MVT AlignVT = MVT::getVectorVT(MVT::i8, 16 * NumLanes);
     Lo = DAG.getBitcast(AlignVT, Lo);
@@ -7767,7 +7767,7 @@ static SDValue lowerVectorShuffleWithSSE
 /// the same lane.
 static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
     SDLoc DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV,
-    ArrayRef<int> Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+    ArrayRef<int> Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   assert(Scale > 1 && "Need a scale to extend.");
   int EltBits = VT.getScalarSizeInBits();
   int NumElements = VT.getVectorNumElements();
@@ -7800,7 +7800,7 @@ static SDValue lowerVectorShuffleAsSpeci
 
   // Found a valid zext mask! Try various lowering strategies based on the
   // input type and available ISA extensions.
-  if (Subtarget->hasSSE41()) {
+  if (Subtarget.hasSSE41()) {
     // Not worth offseting 128-bit vectors if scale == 2, a pattern using
     // PUNPCK will catch this in a later shuffle match.
     if (Offset && Scale == 2 && VT.is128BitVector())
@@ -7839,7 +7839,7 @@ static SDValue lowerVectorShuffleAsSpeci
 
   // The SSE4A EXTRQ instruction can efficiently extend the first 2 lanes
   // to 64-bits.
-  if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget->hasSSE4A()) {
+  if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget.hasSSE4A()) {
     assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!");
     assert(VT.is128BitVector() && "Unexpected vector width!");
 
@@ -7865,7 +7865,7 @@ static SDValue lowerVectorShuffleAsSpeci
   // If this would require more than 2 unpack instructions to expand, use
   // pshufb when available. We can only use more than 2 unpack instructions
   // when zero extending i8 elements which also makes it easier to use pshufb.
-  if (Scale > 4 && EltBits == 8 && Subtarget->hasSSSE3()) {
+  if (Scale > 4 && EltBits == 8 && Subtarget.hasSSSE3()) {
     assert(NumElements == 16 && "Unexpected byte vector width!");
     SDValue PSHUFBMask[16];
     for (int i = 0; i < 16; ++i) {
@@ -7925,7 +7925,7 @@ static SDValue lowerVectorShuffleAsSpeci
 /// are both incredibly common and often quite performance sensitive.
 static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
     SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
-    const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+    const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
 
   int Bits = VT.getSizeInBits();
@@ -8084,7 +8084,7 @@ static bool isShuffleFoldableLoad(SDValu
 /// across all subtarget feature sets.
 static SDValue lowerVectorShuffleAsElementInsertion(
     SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
-    const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+    const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
   MVT ExtVT = VT;
   MVT EltVT = VT.getVectorElementType();
@@ -8141,7 +8141,7 @@ static SDValue lowerVectorShuffleAsEleme
     // This is essentially a special case blend operation, but if we have
     // general purpose blend operations, they are always faster. Bail and let
     // the rest of the lowering handle these as blends.
-    if (Subtarget->hasSSE41())
+    if (Subtarget.hasSSE41())
       return SDValue();
 
     // Otherwise, use MOVSD or MOVSS.
@@ -8187,9 +8187,9 @@ static SDValue lowerVectorShuffleAsEleme
 /// This assumes we have AVX2.
 static SDValue lowerVectorShuffleAsTruncBroadcast(SDLoc DL, MVT VT, SDValue V0,
                                                   int BroadcastIdx,
-                                                  const X86Subtarget *Subtarget,
+                                                  const X86Subtarget &Subtarget,
                                                   SelectionDAG &DAG) {
-  assert(Subtarget->hasAVX2() &&
+  assert(Subtarget.hasAVX2() &&
          "We can only lower integer broadcasts with AVX2!");
 
   EVT EltVT = VT.getVectorElementType();
@@ -8242,11 +8242,11 @@ static SDValue lowerVectorShuffleAsTrunc
 /// FIXME: This is very similar to LowerVectorBroadcast - can we merge them?
 static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
                                              ArrayRef<int> Mask,
-                                             const X86Subtarget *Subtarget,
+                                             const X86Subtarget &Subtarget,
                                              SelectionDAG &DAG) {
-  if (!Subtarget->hasAVX())
+  if (!Subtarget.hasAVX())
     return SDValue();
-  if (VT.isInteger() && !Subtarget->hasAVX2())
+  if (VT.isInteger() && !Subtarget.hasAVX2())
     return SDValue();
 
   // Check that the mask is a broadcast.
@@ -8317,11 +8317,11 @@ static SDValue lowerVectorShuffleAsBroad
 
     // If the scalar isn't a load, we can't broadcast from it in AVX1.
     // Only AVX2 has register broadcasts.
-    if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
+    if (!Subtarget.hasAVX2() && !isShuffleFoldableLoad(V))
       return SDValue();
   } else if (MayFoldLoad(BC) && !cast<LoadSDNode>(BC)->isVolatile()) {
     // 32-bit targets need to load i64 as a f64 and then bitcast the result.
-    if (!Subtarget->is64Bit() && VT.getScalarType() == MVT::i64)
+    if (!Subtarget.is64Bit() && VT.getScalarType() == MVT::i64)
       BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements());
 
     // If we are broadcasting a load that is only used by the shuffle
@@ -8337,7 +8337,7 @@ static SDValue lowerVectorShuffleAsBroad
     V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
                     DAG.getMachineFunction().getMachineMemOperand(
                         Ld->getMemOperand(), Offset, SVT.getStoreSize()));
-  } else if (!Subtarget->hasAVX2()) {
+  } else if (!Subtarget.hasAVX2()) {
     // We can't broadcast from a vector register without AVX2.
     return SDValue();
   } else if (BroadcastIdx != 0) {
@@ -8567,7 +8567,7 @@ static SDValue lowerVectorShuffleAsPermu
 /// it is better to avoid lowering through this for integer vectors where
 /// possible.
 static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                       const X86Subtarget *Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(Op.getSimpleValueType() == MVT::v2f64 && "Bad shuffle type!");
@@ -8579,7 +8579,7 @@ static SDValue lowerV2F64VectorShuffle(S
 
   if (isSingleInputShuffleMask(Mask)) {
     // Use low duplicate instructions for masks that match their pattern.
-    if (Subtarget->hasSSE3())
+    if (Subtarget.hasSSE3())
       if (isShuffleEquivalent(V1, V2, Mask, {0, 0}))
         return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, V1);
 
@@ -8587,7 +8587,7 @@ static SDValue lowerV2F64VectorShuffle(S
     // single input as both of the "inputs" to this instruction..
     unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1);
 
-    if (Subtarget->hasAVX()) {
+    if (Subtarget.hasAVX()) {
       // If we have AVX, we can use VPERMILPS which will allow folding a load
       // into the shuffle.
       return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1,
@@ -8626,7 +8626,7 @@ static SDValue lowerV2F64VectorShuffle(S
           DL, MVT::v2f64, V2,
           DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S));
 
-  if (Subtarget->hasSSE41())
+  if (Subtarget.hasSSE41())
     if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
                                                   Subtarget, DAG))
       return Blend;
@@ -8648,7 +8648,7 @@ static SDValue lowerV2F64VectorShuffle(S
 /// it falls back to the floating point shuffle operation with appropriate bit
 /// casting.
 static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                       const X86Subtarget *Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(Op.getSimpleValueType() == MVT::v2i64 && "Bad shuffle type!");
@@ -8719,7 +8719,7 @@ static SDValue lowerV2I64VectorShuffle(S
 
   // We have different paths for blend lowering, but they all must use the
   // *exact* same predicate.
-  bool IsBlendSupported = Subtarget->hasSSE41();
+  bool IsBlendSupported = Subtarget.hasSSE41();
   if (IsBlendSupported)
     if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
                                                   Subtarget, DAG))
@@ -8732,7 +8732,7 @@ static SDValue lowerV2I64VectorShuffle(S
 
   // Try to use byte rotation instructions.
   // Its more profitable for pre-SSSE3 to use shuffles/unpacks.
-  if (Subtarget->hasSSSE3())
+  if (Subtarget.hasSSSE3())
     if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
             DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
       return Rotate;
@@ -8867,7 +8867,7 @@ static SDValue lowerVectorShuffleWithSHU
 /// domain crossing penalties, as these are sufficient to implement all v4f32
 /// shuffles.
 static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                       const X86Subtarget *Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!");
@@ -8887,14 +8887,14 @@ static SDValue lowerV4F32VectorShuffle(S
       return Broadcast;
 
     // Use even/odd duplicate instructions for masks that match their pattern.
-    if (Subtarget->hasSSE3()) {
+    if (Subtarget.hasSSE3()) {
       if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2}))
         return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1);
       if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3}))
         return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1);
     }
 
-    if (Subtarget->hasAVX()) {
+    if (Subtarget.hasAVX()) {
       // If we have AVX, we can use VPERMILPS which will allow folding a load
       // into the shuffle.
       return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f32, V1,
@@ -8917,7 +8917,7 @@ static SDValue lowerV4F32VectorShuffle(S
                                                          Mask, Subtarget, DAG))
       return V;
 
-  if (Subtarget->hasSSE41()) {
+  if (Subtarget.hasSSE41()) {
     if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
                                                   Subtarget, DAG))
       return Blend;
@@ -8946,7 +8946,7 @@ static SDValue lowerV4F32VectorShuffle(S
 /// We try to handle these with integer-domain shuffles where we can, but for
 /// blends we use the floating point domain blend instructions.
 static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                       const X86Subtarget *Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(Op.getSimpleValueType() == MVT::v4i32 && "Bad shuffle type!");
@@ -9001,7 +9001,7 @@ static SDValue lowerV4I32VectorShuffle(S
 
   // We have different paths for blend lowering, but they all must use the
   // *exact* same predicate.
-  bool IsBlendSupported = Subtarget->hasSSE41();
+  bool IsBlendSupported = Subtarget.hasSSE41();
   if (IsBlendSupported)
     if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
                                                   Subtarget, DAG))
@@ -9018,7 +9018,7 @@ static SDValue lowerV4I32VectorShuffle(S
 
   // Try to use byte rotation instructions.
   // Its more profitable for pre-SSSE3 to use shuffles/unpacks.
-  if (Subtarget->hasSSSE3())
+  if (Subtarget.hasSSSE3())
     if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
             DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
       return Rotate;
@@ -9063,7 +9063,7 @@ static SDValue lowerV4I32VectorShuffle(S
 /// vector, form the analogous 128-bit 8-element Mask.
 static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
     SDLoc DL, MVT VT, SDValue V, MutableArrayRef<int> Mask,
-    const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+    const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!");
   MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
 
@@ -9580,7 +9580,7 @@ static SDValue lowerVectorShuffleAsPSHUF
 /// halves of the inputs separately (making them have relatively few inputs)
 /// and then concatenate them.
 static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                       const X86Subtarget *Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(Op.getSimpleValueType() == MVT::v8i16 && "Bad shuffle type!");
@@ -9641,7 +9641,7 @@ static SDValue lowerV8I16VectorShuffle(S
     return Shift;
 
   // See if we can use SSE4A Extraction / Insertion.
-  if (Subtarget->hasSSE4A())
+  if (Subtarget.hasSSE4A())
     if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, DAG))
       return V;
 
@@ -9653,7 +9653,7 @@ static SDValue lowerV8I16VectorShuffle(S
 
   // We have different paths for blend lowering, but they all must use the
   // *exact* same predicate.
-  bool IsBlendSupported = Subtarget->hasSSE41();
+  bool IsBlendSupported = Subtarget.hasSSE41();
   if (IsBlendSupported)
     if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
                                                   Subtarget, DAG))
@@ -9683,7 +9683,7 @@ static SDValue lowerV8I16VectorShuffle(S
 
   // If we can't directly blend but can use PSHUFB, that will be better as it
   // can both shuffle and set up the inefficient blend.
-  if (!IsBlendSupported && Subtarget->hasSSSE3()) {
+  if (!IsBlendSupported && Subtarget.hasSSSE3()) {
     bool V1InUse, V2InUse;
     return lowerVectorShuffleAsPSHUFB(DL, MVT::v8i16, V1, V2, Mask, DAG,
                                       V1InUse, V2InUse);
@@ -9771,7 +9771,7 @@ static int canLowerByDroppingEvenElement
 /// the existing lowering for v8i16 blends on each half, finally PACK-ing them
 /// back together.
 static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                       const X86Subtarget *Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(Op.getSimpleValueType() == MVT::v16i8 && "Bad shuffle type!");
@@ -9797,7 +9797,7 @@ static SDValue lowerV16I8VectorShuffle(S
     return ZExt;
 
   // See if we can use SSE4A Extraction / Insertion.
-  if (Subtarget->hasSSE4A())
+  if (Subtarget.hasSSE4A())
     if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, DAG))
       return V;
 
@@ -9924,7 +9924,7 @@ static SDValue lowerV16I8VectorShuffle(S
   // FIXME: The only exceptions to the above are blends which are exact
   // interleavings with direct instructions supporting them. We currently don't
   // handle those well here.
-  if (Subtarget->hasSSSE3()) {
+  if (Subtarget.hasSSSE3()) {
     bool V1InUse = false;
     bool V2InUse = false;
 
@@ -9935,7 +9935,7 @@ static SDValue lowerV16I8VectorShuffle(S
     // do so. This avoids using them to handle blends-with-zero which is
     // important as a single pshufb is significantly faster for that.
     if (V1InUse && V2InUse) {
-      if (Subtarget->hasSSE41())
+      if (Subtarget.hasSSE41())
         if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i8, V1, V2,
                                                       Mask, Subtarget, DAG))
           return Blend;
@@ -10064,7 +10064,7 @@ static SDValue lowerV16I8VectorShuffle(S
 /// This routine breaks down the specific type of 128-bit shuffle and
 /// dispatches to the lowering routines accordingly.
 static SDValue lower128BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                        MVT VT, const X86Subtarget *Subtarget,
+                                        MVT VT, const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
   switch (VT.SimpleTy) {
   case MVT::v2i64:
@@ -10382,7 +10382,7 @@ static SDValue lowerVectorShuffleAsLaneP
 /// \brief Handle lowering 2-lane 128-bit shuffles.
 static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
                                         SDValue V2, ArrayRef<int> Mask,
-                                        const X86Subtarget *Subtarget,
+                                        const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
   // TODO: If minimizing size and one of the inputs is a zero vector and the
   // the zero vector has only one use, we could use a VPERM2X128 to save the
@@ -10475,7 +10475,7 @@ static SDValue lowerV2X128VectorShuffle(
 /// those are still *marginally* more expensive.
 static SDValue lowerVectorShuffleByMerging128BitLanes(
     SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
-    const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+    const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   assert(!isSingleInputShuffleMask(Mask) &&
          "This is only useful with multiple inputs.");
 
@@ -10549,7 +10549,7 @@ static SDValue lowerVectorShuffleByMergi
 /// or shuffling smaller vector types which can lower more efficiently.
 static SDValue lowerVectorShuffleWithUndefHalf(SDLoc DL, MVT VT, SDValue V1,
                                                SDValue V2, ArrayRef<int> Mask,
-                                               const X86Subtarget *Subtarget,
+                                               const X86Subtarget &Subtarget,
                                                SelectionDAG &DAG) {
   assert(VT.is256BitVector() && "Expected 256-bit vector");
 
@@ -10635,7 +10635,7 @@ static SDValue lowerVectorShuffleWithUnd
     return SDValue();
 
   // AVX2 - XXXXuuuu - always extract lowers.
-  if (Subtarget->hasAVX2() && !(UndefUpper && NumUpperHalves == 0)) {
+  if (Subtarget.hasAVX2() && !(UndefUpper && NumUpperHalves == 0)) {
     // AVX2 supports efficient immediate 64-bit element cross-lane shuffles.
     if (VT == MVT::v4f64 || VT == MVT::v4i64)
       return SDValue();
@@ -10714,7 +10714,7 @@ static SDValue lowerVectorShuffleWithSHU
 /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
 /// isn't available.
 static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                       const X86Subtarget *Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(V1.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
@@ -10748,7 +10748,7 @@ static SDValue lowerV4F64VectorShuffle(S
     }
 
     // With AVX2 we have direct support for this permutation.
-    if (Subtarget->hasAVX2())
+    if (Subtarget.hasAVX2())
       return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4f64, V1,
                          getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
 
@@ -10775,7 +10775,7 @@ static SDValue lowerV4F64VectorShuffle(S
   // shuffle. However, if we have AVX2 and either inputs are already in place,
   // we will be able to shuffle even across lanes the other input in a single
   // instruction so skip this pattern.
-  if (!(Subtarget->hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) ||
+  if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) ||
                                  isShuffleMaskInputInPlace(1, Mask))))
     if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
             DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
@@ -10783,7 +10783,7 @@ static SDValue lowerV4F64VectorShuffle(S
 
   // If we have AVX2 then we always want to lower with a blend because an v4 we
   // can fully permute the elements.
-  if (Subtarget->hasAVX2())
+  if (Subtarget.hasAVX2())
     return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2,
                                                       Mask, DAG);
 
@@ -10796,7 +10796,7 @@ static SDValue lowerV4F64VectorShuffle(S
 /// This routine is only called when we have AVX2 and thus a reasonable
 /// instruction set for v4i64 shuffling..
 static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                       const X86Subtarget *Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(V1.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
@@ -10804,7 +10804,7 @@ static SDValue lowerV4I64VectorShuffle(S
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   ArrayRef<int> Mask = SVOp->getMask();
   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
-  assert(Subtarget->hasAVX2() && "We can only lower v4i64 with AVX2!");
+  assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!");
 
   SmallVector<int, 4> WidenedMask;
   if (canWidenShuffleElements(Mask, WidenedMask))
@@ -10859,7 +10859,7 @@ static SDValue lowerV4I64VectorShuffle(S
   // shuffle. However, if we have AVX2 and either inputs are already in place,
   // we will be able to shuffle even across lanes the other input in a single
   // instruction so skip this pattern.
-  if (!(Subtarget->hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) ||
+  if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) ||
                                  isShuffleMaskInputInPlace(1, Mask))))
     if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
             DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
@@ -10875,7 +10875,7 @@ static SDValue lowerV4I64VectorShuffle(S
 /// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
 /// isn't available.
 static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                       const X86Subtarget *Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(V1.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
@@ -10936,7 +10936,7 @@ static SDValue lowerV8F32VectorShuffle(S
           X86ISD::VPERMILPV, DL, MVT::v8f32, V1,
           DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask));
 
-    if (Subtarget->hasAVX2())
+    if (Subtarget.hasAVX2())
       return DAG.getNode(
           X86ISD::VPERMV, DL, MVT::v8f32,
           DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1);
@@ -10954,7 +10954,7 @@ static SDValue lowerV8F32VectorShuffle(S
 
   // If we have AVX2 then we always want to lower with a blend because at v8 we
   // can fully permute the elements.
-  if (Subtarget->hasAVX2())
+  if (Subtarget.hasAVX2())
     return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2,
                                                       Mask, DAG);
 
@@ -10967,7 +10967,7 @@ static SDValue lowerV8F32VectorShuffle(S
 /// This routine is only called when we have AVX2 and thus a reasonable
 /// instruction set for v8i32 shuffling..
 static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                       const X86Subtarget *Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(V1.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
@@ -10975,7 +10975,7 @@ static SDValue lowerV8I32VectorShuffle(S
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   ArrayRef<int> Mask = SVOp->getMask();
   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
-  assert(Subtarget->hasAVX2() && "We can only lower v8i32 with AVX2!");
+  assert(Subtarget.hasAVX2() && "We can only lower v8i32 with AVX2!");
 
   // Whenever we can lower this as a zext, that instruction is strictly faster
   // than any alternative. It also allows us to fold memory operands into the
@@ -11047,7 +11047,7 @@ static SDValue lowerV8I32VectorShuffle(S
 /// This routine is only called when we have AVX2 and thus a reasonable
 /// instruction set for v16i16 shuffling..
 static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                        const X86Subtarget *Subtarget,
+                                        const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(V1.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
@@ -11055,7 +11055,7 @@ static SDValue lowerV16I16VectorShuffle(
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   ArrayRef<int> Mask = SVOp->getMask();
   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
-  assert(Subtarget->hasAVX2() && "We can only lower v16i16 with AVX2!");
+  assert(Subtarget.hasAVX2() && "We can only lower v16i16 with AVX2!");
 
   // Whenever we can lower this as a zext, that instruction is strictly faster
   // than any alternative. It also allows us to fold memory operands into the
@@ -11138,7 +11138,7 @@ static SDValue lowerV16I16VectorShuffle(
 /// This routine is only called when we have AVX2 and thus a reasonable
 /// instruction set for v32i8 shuffling..
 static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                       const X86Subtarget *Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(V1.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
@@ -11146,7 +11146,7 @@ static SDValue lowerV32I8VectorShuffle(S
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   ArrayRef<int> Mask = SVOp->getMask();
   assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
-  assert(Subtarget->hasAVX2() && "We can only lower v32i8 with AVX2!");
+  assert(Subtarget.hasAVX2() && "We can only lower v32i8 with AVX2!");
 
   // Whenever we can lower this as a zext, that instruction is strictly faster
   // than any alternative. It also allows us to fold memory operands into the
@@ -11215,7 +11215,7 @@ static SDValue lowerV32I8VectorShuffle(S
 /// shuffle or splits it into two 128-bit shuffles and fuses the results back
 /// together based on the available instructions.
 static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                        MVT VT, const X86Subtarget *Subtarget,
+                                        MVT VT, const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
   SDLoc DL(Op);
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
@@ -11244,7 +11244,7 @@ static SDValue lower256BitVectorShuffle(
   // essentially *zero* ability to manipulate a 256-bit vector with integer
   // types. Since we'll use floating point types there eventually, just
   // immediately cast everything to a float and operate entirely in that domain.
-  if (VT.isInteger() && !Subtarget->hasAVX2()) {
+  if (VT.isInteger() && !Subtarget.hasAVX2()) {
     int ElementBits = VT.getScalarSizeInBits();
     if (ElementBits < 32)
       // No floating point type available, decompose into 128-bit vectors.
@@ -11329,7 +11329,7 @@ static SDValue lowerVectorShuffleWithPER
 
 /// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
 static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                       const X86Subtarget *Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
@@ -11351,7 +11351,7 @@ static SDValue lowerV8F64VectorShuffle(S
 
 /// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
 static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                        const X86Subtarget *Subtarget,
+                                        const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
@@ -11369,7 +11369,7 @@ static SDValue lowerV16F32VectorShuffle(
 
 /// \brief Handle lowering of 8-lane 64-bit integer shuffles.
 static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                       const X86Subtarget *Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
@@ -11391,7 +11391,7 @@ static SDValue lowerV8I64VectorShuffle(S
 
 /// \brief Handle lowering of 16-lane 32-bit integer shuffles.
 static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                        const X86Subtarget *Subtarget,
+                                        const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
@@ -11409,7 +11409,7 @@ static SDValue lowerV16I32VectorShuffle(
 
 /// \brief Handle lowering of 32-lane 16-bit integer shuffles.
 static SDValue lowerV32I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                        const X86Subtarget *Subtarget,
+                                        const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(V1.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
@@ -11417,14 +11417,14 @@ static SDValue lowerV32I16VectorShuffle(
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   ArrayRef<int> Mask = SVOp->getMask();
   assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
-  assert(Subtarget->hasBWI() && "We can only lower v32i16 with AVX-512-BWI!");
+  assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!");
 
   return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);
 }
 
 /// \brief Handle lowering of 64-lane 8-bit integer shuffles.
 static SDValue lowerV64I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                       const X86Subtarget *Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(V1.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
@@ -11432,7 +11432,7 @@ static SDValue lowerV64I8VectorShuffle(S
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   ArrayRef<int> Mask = SVOp->getMask();
   assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!");
-  assert(Subtarget->hasBWI() && "We can only lower v64i8 with AVX-512-BWI!");
+  assert(Subtarget.hasBWI() && "We can only lower v64i8 with AVX-512-BWI!");
 
   // FIXME: Implement direct support for this type!
   return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
@@ -11444,12 +11444,12 @@ static SDValue lowerV64I8VectorShuffle(S
 /// shuffle or splits it into two 256-bit shuffles and fuses the results back
 /// together based on the available instructions.
 static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                        MVT VT, const X86Subtarget *Subtarget,
+                                        MVT VT, const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
   SDLoc DL(Op);
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   ArrayRef<int> Mask = SVOp->getMask();
-  assert(Subtarget->hasAVX512() &&
+  assert(Subtarget.hasAVX512() &&
          "Cannot lower 512-bit vectors w/ basic ISA!");
 
   // Check for being able to broadcast a single element.
@@ -11471,11 +11471,11 @@ static SDValue lower512BitVectorShuffle(
   case MVT::v16i32:
     return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
   case MVT::v32i16:
-    if (Subtarget->hasBWI())
+    if (Subtarget.hasBWI())
       return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
     break;
   case MVT::v64i8:
-    if (Subtarget->hasBWI())
+    if (Subtarget.hasBWI())
       return lowerV64I8VectorShuffle(Op, V1, V2, Subtarget, DAG);
     break;
 
@@ -11492,12 +11492,12 @@ static SDValue lower512BitVectorShuffle(
 // The only way to shuffle bits is to sign-extend the mask vector to SIMD
 // vector, shuffle and then truncate it back.
 static SDValue lower1BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                      MVT VT, const X86Subtarget *Subtarget,
+                                      MVT VT, const X86Subtarget &Subtarget,
                                       SelectionDAG &DAG) {
   SDLoc DL(Op);
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   ArrayRef<int> Mask = SVOp->getMask();
-  assert(Subtarget->hasAVX512() &&
+  assert(Subtarget.hasAVX512() &&
          "Cannot lower 512-bit vectors w/o basic ISA!");
   MVT ExtVT;
   switch (VT.SimpleTy) {
@@ -11548,7 +11548,7 @@ static SDValue lower1BitVectorShuffle(SD
 /// above in helper routines. The canonicalization attempts to widen shuffles
 /// to involve fewer lanes of wider elements, consolidate symmetric patterns
 /// s.t. only one of the two inputs needs to be tested, etc.
-static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
                                   SelectionDAG &DAG) {
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   ArrayRef<int> Mask = SVOp->getMask();
@@ -11729,7 +11729,7 @@ static bool BUILD_VECTORtoBlendMask(Buil
 
 /// \brief Try to lower a VSELECT instruction to a vector shuffle.
 static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
-                                           const X86Subtarget *Subtarget,
+                                           const X86Subtarget &Subtarget,
                                            SelectionDAG &DAG) {
   SDValue Cond = Op.getOperand(0);
   SDValue LHS = Op.getOperand(1);
@@ -11767,7 +11767,7 @@ SDValue X86TargetLowering::LowerVSELECT(
     return BlendOp;
 
   // Variable blends are only legal from SSE4.1 onward.
-  if (!Subtarget->hasSSE41())
+  if (!Subtarget.hasSSE41())
     return SDValue();
 
   // Only some types will be legal on some subtargets. If we can emit a legal
@@ -11780,7 +11780,7 @@ SDValue X86TargetLowering::LowerVSELECT(
 
   case MVT::v32i8:
     // The byte blends for AVX vectors were introduced only in AVX2.
-    if (Subtarget->hasAVX2())
+    if (Subtarget.hasAVX2())
       return Op;
 
     return SDValue();
@@ -11788,7 +11788,7 @@ SDValue X86TargetLowering::LowerVSELECT(
   case MVT::v8i16:
   case MVT::v16i16:
     // AVX-512 BWI and VLX features support VSELECT with i16 elements.
-    if (Subtarget->hasBWI() && Subtarget->hasVLX())
+    if (Subtarget.hasBWI() && Subtarget.hasVLX())
       return Op;
 
     // FIXME: We should custom lower this by fixing the condition and using i8
@@ -11866,7 +11866,7 @@ X86TargetLowering::ExtractBitFromMaskVec
   MVT EltVT = Op.getSimpleValueType();
 
   assert((EltVT == MVT::i1) && "Unexpected operands in ExtractBitFromMaskVector");
-  assert((VecVT.getVectorNumElements() <= 16 || Subtarget->hasBWI()) &&
+  assert((VecVT.getVectorNumElements() <= 16 || Subtarget.hasBWI()) &&
          "Unexpected vector type in ExtractBitFromMaskVector");
 
   // variable index can't be handled in mask registers,
@@ -11881,7 +11881,7 @@ X86TargetLowering::ExtractBitFromMaskVec
 
   unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
   const TargetRegisterClass* rc = getRegClassFor(VecVT);
-  if (!Subtarget->hasDQI() && (VecVT.getVectorNumElements() <= 8))
+  if (!Subtarget.hasDQI() && (VecVT.getVectorNumElements() <= 8))
     rc = getRegClassFor(MVT::v16i1);
   unsigned MaxSift = rc->getSize()*8 - 1;
   Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
@@ -11905,7 +11905,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_E
 
   if (!isa<ConstantSDNode>(Idx)) {
     if (VecVT.is512BitVector() ||
-        (VecVT.is256BitVector() && Subtarget->hasInt256() &&
+        (VecVT.is256BitVector() && Subtarget.hasInt256() &&
          VecVT.getVectorElementType().getSizeInBits() == 32)) {
 
       MVT MaskEltVT =
@@ -11946,7 +11946,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_E
 
   assert(VecVT.is128BitVector() && "Unexpected vector length");
 
-  if (Subtarget->hasSSE41())
+  if (Subtarget.hasSSE41())
     if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG))
       return Res;
 
@@ -12060,8 +12060,8 @@ SDValue X86TargetLowering::LowerINSERT_V
       // TODO: It is worthwhile to cast integer to floating point and back
       // and incur a domain crossing penalty if that's what we'll end up
       // doing anyway after extracting to a 128-bit vector.
-      if ((Subtarget->hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) ||
-          (Subtarget->hasAVX2() && EltVT == MVT::i32)) {
+      if ((Subtarget.hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) ||
+          (Subtarget.hasAVX2() && EltVT == MVT::i32)) {
         SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
         N2 = DAG.getIntPtrConstant(1, dl);
         return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, N2);
@@ -12085,7 +12085,7 @@ SDValue X86TargetLowering::LowerINSERT_V
   }
   assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");
 
-  if (Subtarget->hasSSE41()) {
+  if (Subtarget.hasSSE41()) {
     if (EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) {
       unsigned Opc;
       if (VT == MVT::v8i16) {
@@ -12185,7 +12185,7 @@ static SDValue LowerSCALAR_TO_VECTOR(SDV
 // Lower a node with an EXTRACT_SUBVECTOR opcode.  This may result in
 // a simple subregister reference or explicit instructions to grab
 // upper bits of a vector.
-static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
                                       SelectionDAG &DAG) {
   SDLoc dl(Op);
   SDValue In =  Op.getOperand(0);
@@ -12194,7 +12194,7 @@ static SDValue LowerEXTRACT_SUBVECTOR(SD
   MVT ResVT   = Op.getSimpleValueType();
   MVT InVT    = In.getSimpleValueType();
 
-  if (Subtarget->hasFp256()) {
+  if (Subtarget.hasFp256()) {
     if (ResVT.is128BitVector() &&
         (InVT.is256BitVector() || InVT.is512BitVector()) &&
         isa<ConstantSDNode>(Idx)) {
@@ -12211,9 +12211,9 @@ static SDValue LowerEXTRACT_SUBVECTOR(SD
 // Lower a node with an INSERT_SUBVECTOR opcode.  This may result in a
 // simple superregister reference or explicit instructions to insert
 // the upper bits of a vector.
-static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
                                      SelectionDAG &DAG) {
-  if (!Subtarget->hasAVX())
+  if (!Subtarget.hasAVX())
     return SDValue();
 
   SDLoc dl(Op);
@@ -12246,7 +12246,7 @@ static SDValue LowerINSERT_SUBVECTOR(SDV
         bool Fast;
         unsigned Alignment = FirstLd->getAlignment();
         unsigned AS = FirstLd->getAddressSpace();
-        const X86TargetLowering *TLI = Subtarget->getTargetLowering();
+        const X86TargetLowering *TLI = Subtarget.getTargetLowering();
         if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
                                     OpVT, AS, Alignment, &Fast) && Fast) {
           SDValue Ops[] = { SubVec2, SubVec };
@@ -12286,12 +12286,12 @@ X86TargetLowering::LowerConstantPool(SDV
   unsigned WrapperKind = X86ISD::Wrapper;
   CodeModel::Model M = DAG.getTarget().getCodeModel();
 
-  if (Subtarget->isPICStyleRIPRel() &&
+  if (Subtarget.isPICStyleRIPRel() &&
       (M == CodeModel::Small || M == CodeModel::Kernel))
     WrapperKind = X86ISD::WrapperRIP;
-  else if (Subtarget->isPICStyleGOT())
+  else if (Subtarget.isPICStyleGOT())
     OpFlag = X86II::MO_GOTOFF;
-  else if (Subtarget->isPICStyleStubPIC())
+  else if (Subtarget.isPICStyleStubPIC())
     OpFlag = X86II::MO_PIC_BASE_OFFSET;
 
   auto PtrVT = getPointerTy(DAG.getDataLayout());
@@ -12318,12 +12318,12 @@ SDValue X86TargetLowering::LowerJumpTabl
   unsigned WrapperKind = X86ISD::Wrapper;
   CodeModel::Model M = DAG.getTarget().getCodeModel();
 
-  if (Subtarget->isPICStyleRIPRel() &&
+  if (Subtarget.isPICStyleRIPRel() &&
       (M == CodeModel::Small || M == CodeModel::Kernel))
     WrapperKind = X86ISD::WrapperRIP;
-  else if (Subtarget->isPICStyleGOT())
+  else if (Subtarget.isPICStyleGOT())
     OpFlag = X86II::MO_GOTOFF;
-  else if (Subtarget->isPICStyleStubPIC())
+  else if (Subtarget.isPICStyleStubPIC())
     OpFlag = X86II::MO_PIC_BASE_OFFSET;
 
   auto PtrVT = getPointerTy(DAG.getDataLayout());
@@ -12350,16 +12350,16 @@ X86TargetLowering::LowerExternalSymbol(S
   unsigned WrapperKind = X86ISD::Wrapper;
   CodeModel::Model M = DAG.getTarget().getCodeModel();
 
-  if (Subtarget->isPICStyleRIPRel() &&
+  if (Subtarget.isPICStyleRIPRel() &&
       (M == CodeModel::Small || M == CodeModel::Kernel)) {
-    if (Subtarget->isTargetDarwin() || Subtarget->isTargetELF())
+    if (Subtarget.isTargetDarwin() || Subtarget.isTargetELF())
       OpFlag = X86II::MO_GOTPCREL;
     WrapperKind = X86ISD::WrapperRIP;
-  } else if (Subtarget->isPICStyleGOT()) {
+  } else if (Subtarget.isPICStyleGOT()) {
     OpFlag = X86II::MO_GOT;
-  } else if (Subtarget->isPICStyleStubPIC()) {
+  } else if (Subtarget.isPICStyleStubPIC()) {
     OpFlag = X86II::MO_DARWIN_NONLAZY_PIC_BASE;
-  } else if (Subtarget->isPICStyleStubNoDynamic()) {
+  } else if (Subtarget.isPICStyleStubNoDynamic()) {
     OpFlag = X86II::MO_DARWIN_NONLAZY;
   }
 
@@ -12371,7 +12371,7 @@ X86TargetLowering::LowerExternalSymbol(S
 
   // With PIC, the address is actually $g + Offset.
   if (DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
-      !Subtarget->is64Bit()) {
+      !Subtarget.is64Bit()) {
     Result =
         DAG.getNode(ISD::ADD, DL, PtrVT,
                     DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
@@ -12391,7 +12391,7 @@ SDValue
 X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
   // Create the TargetBlockAddressAddress node.
   unsigned char OpFlags =
-    Subtarget->ClassifyBlockAddressReference();
+    Subtarget.ClassifyBlockAddressReference();
   CodeModel::Model M = DAG.getTarget().getCodeModel();
   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
@@ -12399,7 +12399,7 @@ X86TargetLowering::LowerBlockAddress(SDV
   auto PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags);
 
-  if (Subtarget->isPICStyleRIPRel() &&
+  if (Subtarget.isPICStyleRIPRel() &&
       (M == CodeModel::Small || M == CodeModel::Kernel))
     Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result);
   else
@@ -12420,7 +12420,7 @@ X86TargetLowering::LowerGlobalAddress(co
   // Create the TargetGlobalAddress node, folding in the constant
   // offset if it is legal.
   unsigned char OpFlags =
-      Subtarget->ClassifyGlobalReference(GV, DAG.getTarget());
+      Subtarget.ClassifyGlobalReference(GV, DAG.getTarget());
   CodeModel::Model M = DAG.getTarget().getCodeModel();
   auto PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue Result;
@@ -12433,7 +12433,7 @@ X86TargetLowering::LowerGlobalAddress(co
     Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, OpFlags);
   }
 
-  if (Subtarget->isPICStyleRIPRel() &&
+  if (Subtarget.isPICStyleRIPRel() &&
       (M == CodeModel::Small || M == CodeModel::Kernel))
     Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result);
   else
@@ -12627,35 +12627,35 @@ X86TargetLowering::LowerGlobalTLSAddress
   const GlobalValue *GV = GA->getGlobal();
   auto PtrVT = getPointerTy(DAG.getDataLayout());
 
-  if (Subtarget->isTargetELF()) {
+  if (Subtarget.isTargetELF()) {
     TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
     switch (model) {
       case TLSModel::GeneralDynamic:
-        if (Subtarget->is64Bit())
+        if (Subtarget.is64Bit())
           return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
         return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
       case TLSModel::LocalDynamic:
         return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT,
-                                           Subtarget->is64Bit());
+                                           Subtarget.is64Bit());
       case TLSModel::InitialExec:
       case TLSModel::LocalExec:
-        return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget->is64Bit(),
+        return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
                                    DAG.getTarget().getRelocationModel() ==
                                        Reloc::PIC_);
     }
     llvm_unreachable("Unknown TLS model.");
   }
 
-  if (Subtarget->isTargetDarwin()) {
+  if (Subtarget.isTargetDarwin()) {
     // Darwin only has one model of TLS.  Lower to that.
     unsigned char OpFlag = 0;
-    unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
+    unsigned WrapperKind = Subtarget.isPICStyleRIPRel() ?
                            X86ISD::WrapperRIP : X86ISD::Wrapper;
 
     // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
     // global base reg.
     bool PIC32 = (DAG.getTarget().getRelocationModel() == Reloc::PIC_) &&
-                 !Subtarget->is64Bit();
+                 !Subtarget.is64Bit();
     if (PIC32)
       OpFlag = X86II::MO_TLVP_PIC_BASE;
     else
@@ -12689,12 +12689,12 @@ X86TargetLowering::LowerGlobalTLSAddress
 
     // And our return value (tls address) is in the standard call return value
     // location.
-    unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+    unsigned Reg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
     return DAG.getCopyFromReg(Chain, DL, Reg, PtrVT, Chain.getValue(1));
   }
 
-  if (Subtarget->isTargetKnownWindowsMSVC() ||
-      Subtarget->isTargetWindowsGNU()) {
+  if (Subtarget.isTargetKnownWindowsMSVC() ||
+      Subtarget.isTargetWindowsGNU()) {
     // Just use the implicit TLS architecture
     // Need to generate someting similar to:
     //   mov     rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
@@ -12712,15 +12712,15 @@ X86TargetLowering::LowerGlobalTLSAddress
     // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or
     // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly
     // use its literal value of 0x2C.
-    Value *Ptr = Constant::getNullValue(Subtarget->is64Bit()
+    Value *Ptr = Constant::getNullValue(Subtarget.is64Bit()
                                         ? Type::getInt8PtrTy(*DAG.getContext(),
                                                              256)
                                         : Type::getInt32PtrTy(*DAG.getContext(),
                                                               257));
 
-    SDValue TlsArray = Subtarget->is64Bit()
+    SDValue TlsArray = Subtarget.is64Bit()
                            ? DAG.getIntPtrConstant(0x58, dl)
-                           : (Subtarget->isTargetWindowsGNU()
+                           : (Subtarget.isTargetWindowsGNU()
                                   ? DAG.getIntPtrConstant(0x2C, dl)
                                   : DAG.getExternalSymbol("_tls_array", PtrVT));
 
@@ -12734,7 +12734,7 @@ X86TargetLowering::LowerGlobalTLSAddress
     } else {
       // Load the _tls_index variable
       SDValue IDX = DAG.getExternalSymbol("_tls_index", PtrVT);
-      if (Subtarget->is64Bit())
+      if (Subtarget.is64Bit())
         IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, IDX,
                              MachinePointerInfo(), MVT::i32, false, false,
                              false, 0);
@@ -12850,13 +12850,13 @@ SDValue X86TargetLowering::LowerSINT_TO_
   if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
     return Op;
   if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
-      Subtarget->is64Bit()) {
+      Subtarget.is64Bit()) {
     return Op;
   }
 
   SDValue ValueToStore = Op.getOperand(0);
   if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
-      !Subtarget->is64Bit())
+      !Subtarget.is64Bit())
     // Bitcasting to f64 here allows us to do a single 64-bit store from
     // an SSE register, avoiding the store forwarding penalty that would come
     // with two 32-bit stores.
@@ -12989,7 +12989,7 @@ SDValue X86TargetLowering::LowerUINT_TO_
   SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
   SDValue Result;
 
-  if (Subtarget->hasSSE3()) {
+  if (Subtarget.hasSSE3()) {
     // FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'.
     Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
   } else {
@@ -13184,10 +13184,10 @@ SDValue X86TargetLowering::lowerUINT_TO_
   }
   case MVT::v4i32:
   case MVT::v8i32:
-    return lowerUINT_TO_FP_vXi32(Op, DAG, *Subtarget);
+    return lowerUINT_TO_FP_vXi32(Op, DAG, Subtarget);
   case MVT::v16i8:
   case MVT::v16i16:
-    assert(Subtarget->hasAVX512());
+    assert(Subtarget.hasAVX512());
     return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
                        DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0));
   }
@@ -13211,8 +13211,8 @@ SDValue X86TargetLowering::LowerUINT_TO_
   MVT SrcVT = N0.getSimpleValueType();
   MVT DstVT = Op.getSimpleValueType();
 
-  if (Subtarget->hasAVX512() && isScalarFPTypeInSSEReg(DstVT) &&
-      (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget->is64Bit()))) {
+  if (Subtarget.hasAVX512() && isScalarFPTypeInSSEReg(DstVT) &&
+      (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget.is64Bit()))) {
     // Conversions from unsigned i32 to f32/f64 are legal,
     // using VCVTUSI2SS/SD.  Same for i64 in 64-bit mode.
     return Op;
@@ -13222,7 +13222,7 @@ SDValue X86TargetLowering::LowerUINT_TO_
     return LowerUINT_TO_FP_i64(Op, DAG);
   if (SrcVT == MVT::i32 && X86ScalarSSEf64)
     return LowerUINT_TO_FP_i32(Op, DAG);
-  if (Subtarget->is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
+  if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
     return SDValue();
 
   // Make a 64-bit buffer, and use it to build an FILD.
@@ -13242,7 +13242,7 @@ SDValue X86TargetLowering::LowerUINT_TO_
 
   assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
   SDValue ValueToStore = Op.getOperand(0);
-  if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget->is64Bit())
+  if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit())
     // Bitcasting to f64 here allows us to do a single 64-bit store from
     // an SSE register, avoiding the store forwarding penalty that would come
     // with two 32-bit stores.
@@ -13325,10 +13325,10 @@ X86TargetLowering::FP_TO_INTHelper(SDVal
   // used for the 32-bit subtarget, but also for f80 on a 64-bit target.
   bool UnsignedFixup = !IsSigned &&
                        DstTy == MVT::i64 &&
-                       (!Subtarget->is64Bit() ||
+                       (!Subtarget.is64Bit() ||
                         !isScalarFPTypeInSSEReg(TheVT));
 
-  if (!IsSigned && DstTy != MVT::i64 && !Subtarget->hasAVX512()) {
+  if (!IsSigned && DstTy != MVT::i64 && !Subtarget.hasAVX512()) {
     // Replace the fp-to-uint32 operation with an fp-to-sint64 FIST.
     // The low 32 bits of the fist result will have the correct uint32 result.
     assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
@@ -13343,7 +13343,7 @@ X86TargetLowering::FP_TO_INTHelper(SDVal
   if (DstTy == MVT::i32 &&
       isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
     return std::make_pair(SDValue(), SDValue());
-  if (Subtarget->is64Bit() &&
+  if (Subtarget.is64Bit() &&
       DstTy == MVT::i64 &&
       isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
     return std::make_pair(SDValue(), SDValue());
@@ -13459,7 +13459,7 @@ X86TargetLowering::FP_TO_INTHelper(SDVal
                                  false, false, false, 0);
     High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust);
 
-    if (Subtarget->is64Bit()) {
+    if (Subtarget.is64Bit()) {
       // Join High32 and Low32 into a 64-bit result.
       // (High32 << 32) | Low32
       Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32);
@@ -13486,7 +13486,7 @@ X86TargetLowering::FP_TO_INTHelper(SDVal
 }
 
 static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
-                              const X86Subtarget *Subtarget) {
+                              const X86Subtarget &Subtarget) {
   MVT VT = Op->getSimpleValueType(0);
   SDValue In = Op->getOperand(0);
   MVT InVT = In.getSimpleValueType();
@@ -13513,7 +13513,7 @@ static SDValue LowerAVXExtend(SDValue Op
       ((VT != MVT::v4i64) || (InVT != MVT::v4i32)))
     return SDValue();
 
-  if (Subtarget->hasInt256())
+  if (Subtarget.hasInt256())
     return DAG.getNode(X86ISD::VZEXT, dl, VT, In);
 
   SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
@@ -13532,13 +13532,13 @@ static SDValue LowerAVXExtend(SDValue Op
 }
 
 static  SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
-                  const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+                  const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   MVT VT = Op->getSimpleValueType(0);
   SDValue In = Op->getOperand(0);
   MVT InVT = In.getSimpleValueType();
   SDLoc DL(Op);
   unsigned int NumElts = VT.getVectorNumElements();
-  if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI())
+  if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI())
     return SDValue();
 
   if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
@@ -13557,16 +13557,16 @@ static  SDValue LowerZERO_EXTEND_AVX512(
   return DAG.getNode(X86ISD::VTRUNC, DL, VT, V);
 }
 
-static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
                                SelectionDAG &DAG) {
-  if (Subtarget->hasFp256())
+  if (Subtarget.hasFp256())
     if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget))
       return Res;
 
   return SDValue();
 }
 
-static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
                                 SelectionDAG &DAG) {
   SDLoc DL(Op);
   MVT VT = Op.getSimpleValueType();
@@ -13576,7 +13576,7 @@ static SDValue LowerZERO_EXTEND(SDValue
   if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
     return LowerZERO_EXTEND_AVX512(Op, Subtarget, DAG);
 
-  if (Subtarget->hasFp256())
+  if (Subtarget.hasFp256())
     if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget))
       return Res;
 
@@ -13586,7 +13586,7 @@ static SDValue LowerZERO_EXTEND(SDValue
 }
 
 static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
-                                  const X86Subtarget *Subtarget) {
+                                  const X86Subtarget &Subtarget) {
 
   SDLoc DL(Op);
   MVT VT = Op.getSimpleValueType();
@@ -13598,10 +13598,10 @@ static SDValue LowerTruncateVecI1(SDValu
   // Shift LSB to MSB and use VPMOVB2M - SKX.
   unsigned ShiftInx = InVT.getScalarSizeInBits() - 1;
   if ((InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 &&
-         Subtarget->hasBWI()) ||     // legal, will go to VPMOVB2M, VPMOVW2M
+         Subtarget.hasBWI()) ||     // legal, will go to VPMOVB2M, VPMOVW2M
       ((InVT.is256BitVector() || InVT.is128BitVector()) &&
-             InVT.getScalarSizeInBits() <= 16 && Subtarget->hasBWI() &&
-             Subtarget->hasVLX())) { // legal, will go to VPMOVB2M, VPMOVW2M
+             InVT.getScalarSizeInBits() <= 16 && Subtarget.hasBWI() &&
+             Subtarget.hasVLX())) { // legal, will go to VPMOVB2M, VPMOVW2M
     // Shift packed bytes not supported natively, bitcast to dword
     MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
     SDValue  ShiftNode = DAG.getNode(ISD::SHL, DL, ExtVT,
@@ -13611,10 +13611,10 @@ static SDValue LowerTruncateVecI1(SDValu
     return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode);
   }
   if ((InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 &&
-         Subtarget->hasDQI()) ||  // legal, will go to VPMOVD2M, VPMOVQ2M
+         Subtarget.hasDQI()) ||  // legal, will go to VPMOVD2M, VPMOVQ2M
       ((InVT.is256BitVector() || InVT.is128BitVector()) &&
-         InVT.getScalarSizeInBits() >= 32 && Subtarget->hasDQI() &&
-         Subtarget->hasVLX())) {  // legal, will go to VPMOVD2M, VPMOVQ2M
+         InVT.getScalarSizeInBits() >= 32 && Subtarget.hasDQI() &&
+         Subtarget.hasVLX())) {  // legal, will go to VPMOVD2M, VPMOVQ2M
 
     SDValue  ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In,
                                      DAG.getConstant(ShiftInx, DL, InVT));
@@ -13625,7 +13625,7 @@ static SDValue LowerTruncateVecI1(SDValu
   unsigned NumElts = InVT.getVectorNumElements();
   if (InVT.getSizeInBits() < 512 &&
       (InVT.getScalarType() == MVT::i8 || InVT.getScalarType() == MVT::i16 ||
-       !Subtarget->hasVLX())) {
+       !Subtarget.hasVLX())) {
     assert((NumElts == 8 || NumElts == 16) && "Unexpected vector type.");
 
     // TESTD/Q should be used (if BW supported we use CVT2MASK above),
@@ -13662,16 +13662,16 @@ SDValue X86TargetLowering::LowerTRUNCATE
     return LowerTruncateVecI1(Op, DAG, Subtarget);
 
   // vpmovqb/w/d, vpmovdb/w, vpmovwb
-  if (Subtarget->hasAVX512()) {
+  if (Subtarget.hasAVX512()) {
     // word to byte only under BWI
-    if (InVT == MVT::v16i16 && !Subtarget->hasBWI()) // v16i16 -> v16i8
+    if (InVT == MVT::v16i16 && !Subtarget.hasBWI()) // v16i16 -> v16i8
       return DAG.getNode(X86ISD::VTRUNC, DL, VT,
                          DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In));
     return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
   }
   if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
     // On AVX2, v4i64 -> v4i32 becomes VPERMD.
-    if (Subtarget->hasInt256()) {
+    if (Subtarget.hasInt256()) {
       static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
       In = DAG.getBitcast(MVT::v8i32, In);
       In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32),
@@ -13692,7 +13692,7 @@ SDValue X86TargetLowering::LowerTRUNCATE
 
   if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
     // On AVX2, v8i32 -> v8i16 becomed PSHUFB.
-    if (Subtarget->hasInt256()) {
+    if (Subtarget.hasInt256()) {
       In = DAG.getBitcast(MVT::v32i8, In);
 
       SmallVector<SDValue,32> pshufbMask;
@@ -13750,7 +13750,7 @@ SDValue X86TargetLowering::LowerTRUNCATE
   if (!VT.is128BitVector() || !InVT.is256BitVector())
     return SDValue();
 
-  assert(Subtarget->hasFp256() && "256-bit vector without AVX!");
+  assert(Subtarget.hasFp256() && "256-bit vector without AVX!");
 
   unsigned NumElems = VT.getVectorNumElements();
   MVT NVT = MVT::getVectorVT(VT.getVectorElementType(), NumElems * 2);
@@ -13998,11 +13998,11 @@ static SDValue LowerFGETSIGN(SDValue Op,
 }
 
 // Check whether an OR'd tree is PTEST-able.
-static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget &Subtarget,
                                       SelectionDAG &DAG) {
   assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
 
-  if (!Subtarget->hasSSE41())
+  if (!Subtarget.hasSSE41())
     return SDValue();
 
   if (!Op->hasOneUse())
@@ -14210,14 +14210,14 @@ SDValue X86TargetLowering::EmitTest(SDVa
     if (ConstantSDNode *C =
         dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
       // An add of one will be selected as an INC.
-      if (C->isOne() && !Subtarget->slowIncDec()) {
+      if (C->isOne() && !Subtarget.slowIncDec()) {
         Opcode = X86ISD::INC;
         NumOperands = 1;
         break;
       }
 
       // An add of negative one (subtract of one) will be selected as a DEC.
-      if (C->isAllOnesValue() && !Subtarget->slowIncDec()) {
+      if (C->isAllOnesValue() && !Subtarget.slowIncDec()) {
         Opcode = X86ISD::DEC;
         NumOperands = 1;
         break;
@@ -14360,7 +14360,7 @@ SDValue X86TargetLowering::EmitCmp(SDVal
     // of memory operations.
     if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 &&
         !DAG.getMachineFunction().getFunction()->optForMinSize() &&
-        !Subtarget->isAtom()) {
+        !Subtarget.isAtom()) {
       unsigned ExtendOp =
           isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
       Op0 = DAG.getNode(ExtendOp, dl, MVT::i32, Op0);
@@ -14380,7 +14380,7 @@ SDValue X86TargetLowering::ConvertCmpIfN
                                                  SelectionDAG &DAG) const {
   // If the subtarget does not support the FUCOMI instruction, floating-point
   // comparisons have to be converted.
-  if (Subtarget->hasCMov() ||
+  if (Subtarget.hasCMov() ||
       Cmp.getOpcode() != X86ISD::CMP ||
       !Cmp.getOperand(0).getValueType().isFloatingPoint() ||
       !Cmp.getOperand(1).getValueType().isFloatingPoint())
@@ -14398,7 +14398,7 @@ SDValue X86TargetLowering::ConvertCmpIfN
   SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);
 
   // Some 64-bit targets lack SAHF support, but they do support FCOMI.
-  assert(Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?");
+  assert(Subtarget.hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?");
   return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
 }
 
@@ -14418,10 +14418,10 @@ SDValue X86TargetLowering::getRsqrtEstim
   // instructions: convert to single, rsqrtss, convert back to double, refine
   // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
   // along with FMA, this could be a throughput win.
-  if (VT == MVT::f32 && Subtarget->hasSSE1())
+  if (VT == MVT::f32 && Subtarget.hasSSE1())
     RecipOp = "sqrtf";
-  else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) ||
-           (VT == MVT::v8f32 && Subtarget->hasAVX()))
+  else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
+           (VT == MVT::v8f32 && Subtarget.hasAVX()))
     RecipOp = "vec-sqrtf";
   else
     return SDValue();
@@ -14450,10 +14450,10 @@ SDValue X86TargetLowering::getRecipEstim
   // 15 instructions: convert to single, rcpss, convert back to double, refine
   // (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA
   // along with FMA, this could be a throughput win.
-  if (VT == MVT::f32 && Subtarget->hasSSE1())
+  if (VT == MVT::f32 && Subtarget.hasSSE1())
     RecipOp = "divf";
-  else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) ||
-           (VT == MVT::v8f32 && Subtarget->hasAVX()))
+  else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
+           (VT == MVT::v8f32 && Subtarget.hasAVX()))
     RecipOp = "vec-divf";
   else
     return SDValue();
@@ -14665,7 +14665,7 @@ static SDValue LowerBoolVSETCC_AVX512(SD
 }
 
 static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
-                                     const X86Subtarget *Subtarget) {
+                                     const X86Subtarget &Subtarget) {
   SDValue Op0 = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1);
   SDValue CC = Op.getOperand(2);
@@ -14734,7 +14734,7 @@ static SDValue ChangeVSETULTtoVSETULE(SD
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, ULTOp1);
 }
 
-static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
                            SelectionDAG &DAG) {
   SDValue Op0 = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1);
@@ -14752,7 +14752,7 @@ static SDValue LowerVSETCC(SDValue Op, c
 
     unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1);
     unsigned Opc = X86ISD::CMPP;
-    if (Subtarget->hasAVX512() && VT.getVectorElementType() == MVT::i1) {
+    if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1) {
       assert(VT.getVectorNumElements() <= 16);
       Opc = X86ISD::CMPM;
     }
@@ -14804,11 +14804,11 @@ static SDValue LowerVSETCC(SDValue Op, c
 
   // The non-AVX512 code below works under the assumption that source and
   // destination types are the same.
-  assert((Subtarget->hasAVX512() || (VT == VTOp0)) &&
+  assert((Subtarget.hasAVX512() || (VT == VTOp0)) &&
          "Value types for source and destination must be the same!");
 
   // Break 256-bit integer vector compare into smaller ones.
-  if (VT.is256BitVector() && !Subtarget->hasInt256())
+  if (VT.is256BitVector() && !Subtarget.hasInt256())
     return Lower256IntVSETCC(Op, DAG);
 
   MVT OpVT = Op1.getSimpleValueType();
@@ -14816,9 +14816,9 @@ static SDValue LowerVSETCC(SDValue Op, c
     return LowerBoolVSETCC_AVX512(Op, DAG);
 
   bool MaskResult = (VT.getVectorElementType() == MVT::i1);
-  if (Subtarget->hasAVX512()) {
+  if (Subtarget.hasAVX512()) {
     if (Op1.getSimpleValueType().is512BitVector() ||
-        (Subtarget->hasBWI() && Subtarget->hasVLX()) ||
+        (Subtarget.hasBWI() && Subtarget.hasVLX()) ||
         (MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
       return LowerIntVSETCC_AVX512(Op, DAG, Subtarget);
 
@@ -14835,7 +14835,7 @@ static SDValue LowerVSETCC(SDValue Op, c
 
   // Lower using XOP integer comparisons.
   if ((VT == MVT::v16i8 || VT == MVT::v8i16 ||
-       VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget->hasXOP()) {
+       VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget.hasXOP()) {
     // Translate compare code to XOP PCOM compare mode.
     unsigned CmpMode = 0;
     switch (SetCCOpcode) {
@@ -14887,8 +14887,8 @@ static SDValue LowerVSETCC(SDValue Op, c
   // Special case: Use min/max operations for SETULE/SETUGE
   MVT VET = VT.getVectorElementType();
   bool hasMinMax =
-       (Subtarget->hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32))
-    || (Subtarget->hasSSE2()  && (VET == MVT::i8));
+       (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32))
+    || (Subtarget.hasSSE2()  && (VET == MVT::i8));
 
   if (hasMinMax) {
     switch (SetCCOpcode) {
@@ -14900,7 +14900,7 @@ static SDValue LowerVSETCC(SDValue Op, c
     if (MinMax) { Swap = false; Invert = false; FlipSigns = false; }
   }
 
-  bool hasSubus = Subtarget->hasSSE2() && (VET == MVT::i8 || VET == MVT::i16);
+  bool hasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16);
   if (!MinMax && hasSubus) {
     // As another special case, use PSUBUS[BW] when it's profitable. E.g. for
     // Op0 u<= Op1:
@@ -14914,7 +14914,7 @@ static SDValue LowerVSETCC(SDValue Op, c
       // beneficial because the constant in the register is no longer
       // destructed as the destination so it can be hoisted out of a loop.
       // Only do this pre-AVX since vpcmp* is no longer destructive.
-      if (Subtarget->hasAVX())
+      if (Subtarget.hasAVX())
         break;
       SDValue ULEOp1 = ChangeVSETULTtoVSETULE(dl, Op1, DAG);
       if (ULEOp1.getNode()) {
@@ -14940,8 +14940,8 @@ static SDValue LowerVSETCC(SDValue Op, c
   // Check that the operation in question is available (most are plain SSE2,
   // but PCMPGTQ and PCMPEQQ have different requirements).
   if (VT == MVT::v2i64) {
-    if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) {
-      assert(Subtarget->hasSSE2() && "Don't know how to lower!");
+    if (Opc == X86ISD::PCMPGT && !Subtarget.hasSSE42()) {
+      assert(Subtarget.hasSSE2() && "Don't know how to lower!");
 
       // First cast everything to the right type.
       Op0 = DAG.getBitcast(MVT::v4i32, Op0);
@@ -14982,10 +14982,10 @@ static SDValue LowerVSETCC(SDValue Op, c
       return DAG.getBitcast(VT, Result);
     }
 
-    if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
+    if (Opc == X86ISD::PCMPEQ && !Subtarget.hasSSE41()) {
       // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
       // pcmpeqd + pshufd + pand.
-      assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
+      assert(Subtarget.hasSSE2() && !FlipSigns && "Don't know how to lower!");
 
       // First cast everything to the right type.
       Op0 = DAG.getBitcast(MVT::v4i32, Op0);
@@ -15038,7 +15038,7 @@ SDValue X86TargetLowering::LowerSETCC(SD
 
   if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
 
-  assert(((!Subtarget->hasAVX512() && VT == MVT::i8) || (VT == MVT::i1))
+  assert(((!Subtarget.hasAVX512() && VT == MVT::i8) || (VT == MVT::i1))
          && "SetCC type must be 8-bit or 1-bit integer");
   SDValue Op0 = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1);
@@ -15168,15 +15168,15 @@ SDValue X86TargetLowering::LowerSELECT(S
   // are available or VBLENDV if AVX is available.
   // Otherwise FP cmovs get lowered into a less efficient branch sequence later.
   if (Cond.getOpcode() == ISD::SETCC &&
-      ((Subtarget->hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) ||
-       (Subtarget->hasSSE1() && VT == MVT::f32)) &&
+      ((Subtarget.hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) ||
+       (Subtarget.hasSSE1() && VT == MVT::f32)) &&
       VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) {
     SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1);
     int SSECC = translateX86FSETCC(
         cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
 
     if (SSECC != 8) {
-      if (Subtarget->hasAVX512()) {
+      if (Subtarget.hasAVX512()) {
         SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CondOp0, CondOp1,
                                   DAG.getConstant(SSECC, DL, MVT::i8));
         return DAG.getNode(X86ISD::SELECT, DL, VT, Cmp, Op1, Op2);
@@ -15198,7 +15198,7 @@ SDValue X86TargetLowering::LowerSELECT(S
       // instructions as the AND/ANDN/OR sequence due to register moves, so
       // don't bother.
 
-      if (Subtarget->hasAVX() &&
+      if (Subtarget.hasAVX() &&
           !isa<ConstantFPSDNode>(Op1) && !isa<ConstantFPSDNode>(Op2)) {
 
         // Convert to vectors, do a VSELECT, and convert back to scalar.
@@ -15438,7 +15438,7 @@ SDValue X86TargetLowering::LowerSELECT(S
 }
 
 static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
-                                       const X86Subtarget *Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   MVT VT = Op->getSimpleValueType(0);
   SDValue In = Op->getOperand(0);
@@ -15449,22 +15449,22 @@ static SDValue LowerSIGN_EXTEND_AVX512(S
 
   // SKX processor
   if ((InVTElt == MVT::i1) &&
-      (((Subtarget->hasBWI() && Subtarget->hasVLX() &&
+      (((Subtarget.hasBWI() && Subtarget.hasVLX() &&
         VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() <= 16)) ||
 
-       ((Subtarget->hasBWI() && VT.is512BitVector() &&
+       ((Subtarget.hasBWI() && VT.is512BitVector() &&
         VTElt.getSizeInBits() <= 16)) ||
 
-       ((Subtarget->hasDQI() && Subtarget->hasVLX() &&
+       ((Subtarget.hasDQI() && Subtarget.hasVLX() &&
         VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() >= 32)) ||
 
-       ((Subtarget->hasDQI() && VT.is512BitVector() &&
+       ((Subtarget.hasDQI() && VT.is512BitVector() &&
         VTElt.getSizeInBits() >= 32))))
     return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
 
   unsigned int NumElts = VT.getVectorNumElements();
 
-  if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI())
+  if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI())
     return SDValue();
 
   if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {
@@ -15488,7 +15488,7 @@ static SDValue LowerSIGN_EXTEND_AVX512(S
 }
 
 static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
-                                             const X86Subtarget *Subtarget,
+                                             const X86Subtarget &Subtarget,
                                              SelectionDAG &DAG) {
   SDValue In = Op->getOperand(0);
   MVT VT = Op->getSimpleValueType(0);
@@ -15506,7 +15506,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_I
   SDLoc dl(Op);
 
   // SSE41 targets can use the pmovsx* instructions directly.
-  if (Subtarget->hasSSE41())
+  if (Subtarget.hasSSE41())
     return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
 
   // pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI.
@@ -15543,7 +15543,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_I
   return SDValue();
 }
 
-static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
                                 SelectionDAG &DAG) {
   MVT VT = Op->getSimpleValueType(0);
   SDValue In = Op->getOperand(0);
@@ -15558,7 +15558,7 @@ static SDValue LowerSIGN_EXTEND(SDValue
       (VT != MVT::v16i16 || InVT != MVT::v16i8))
     return SDValue();
 
-  if (Subtarget->hasInt256())
+  if (Subtarget.hasInt256())
     return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
 
   // Optimize vectors in AVX mode
@@ -15601,7 +15601,7 @@ static SDValue LowerSIGN_EXTEND(SDValue
 // FIXME: Is the expansion actually better than scalar code? It doesn't seem so.
 // TODO: It is possible to support ZExt by zeroing the undef values during
 // the shuffle phase or after the shuffle.
-static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget &Subtarget,
                                  SelectionDAG &DAG) {
   MVT RegVT = Op.getSimpleValueType();
   assert(RegVT.isVector() && "We only custom lower vector sext loads.");
@@ -15609,7 +15609,7 @@ static SDValue LowerExtendedLoad(SDValue
          "We only custom lower integer vector sext loads.");
 
   // Nothing useful we can do without SSE2 shuffles.
-  assert(Subtarget->hasSSE2() && "We only custom lower sext loads with SSE2.");
+  assert(Subtarget.hasSSE2() && "We only custom lower sext loads with SSE2.");
 
   LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
   SDLoc dl(Ld);
@@ -15628,7 +15628,7 @@ static SDValue LowerExtendedLoad(SDValue
   unsigned MemSz = MemVT.getSizeInBits();
   assert(RegSz > MemSz && "Register size must be greater than the mem size");
 
-  if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256()) {
+  if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget.hasInt256()) {
     // The only way in which we have a legal 256-bit vector result but not the
     // integer 256-bit operations needed to directly lower a sextload is if we
     // have AVX1 but not AVX2. In that case, we can always emit a sextload to
@@ -15751,7 +15751,7 @@ static SDValue LowerExtendedLoad(SDValue
 
   if (Ext == ISD::SEXTLOAD) {
     // If we have SSE4.1, we can directly emit a VSEXT node.
-    if (Subtarget->hasSSE41()) {
+    if (Subtarget.hasSSE41()) {
       SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
       return Sext;
@@ -16087,7 +16087,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALL
                                            SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool SplitStack = MF.shouldSplitStack();
-  bool Lower = (Subtarget->isOSWindows() && !Subtarget->isTargetMachO()) ||
+  bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) ||
                SplitStack;
   SDLoc dl(Op);
 
@@ -16102,7 +16102,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALL
   // pointer when other instructions are using the stack.
   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl);
 
-  bool Is64Bit = Subtarget->is64Bit();
+  bool Is64Bit = Subtarget.is64Bit();
   MVT SPTy = getPointerTy(DAG.getDataLayout());
 
   SDValue Result;
@@ -16117,7 +16117,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALL
     SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
     Chain = SP.getValue(1);
     unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
-    const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+    const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
     unsigned StackAlign = TFI.getStackAlignment();
     Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
     if (Align > StackAlign)
@@ -16146,7 +16146,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALL
                                 DAG.getRegister(Vreg, SPTy));
   } else {
     SDValue Flag;
-    const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX);
+    const unsigned Reg = (Subtarget.isTarget64BitLP64() ? X86::RAX : X86::EAX);
 
     Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
     Flag = Chain.getValue(1);
@@ -16154,7 +16154,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALL
 
     Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
 
-    const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+    const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
     unsigned SPReg = RegInfo->getStackRegister();
     SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
     Chain = SP.getValue(1);
@@ -16183,8 +16183,8 @@ SDValue X86TargetLowering::LowerVASTART(
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   SDLoc DL(Op);
 
-  if (!Subtarget->is64Bit() ||
-      Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) {
+  if (!Subtarget.is64Bit() ||
+      Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv())) {
     // vastart just stores the address of the VarArgsFrameIndex slot into the
     // memory location argument.
     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
@@ -16224,21 +16224,21 @@ SDValue X86TargetLowering::LowerVASTART(
 
   // Store ptr to reg_save_area.
   FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(
-      Subtarget->isTarget64BitLP64() ? 8 : 4, DL));
+      Subtarget.isTarget64BitLP64() ? 8 : 4, DL));
   SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT);
   Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, MachinePointerInfo(
-      SV, Subtarget->isTarget64BitLP64() ? 16 : 12), false, false, 0);
+      SV, Subtarget.isTarget64BitLP64() ? 16 : 12), false, false, 0);
   MemOps.push_back(Store);
   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
 }
 
 SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
-  assert(Subtarget->is64Bit() &&
+  assert(Subtarget.is64Bit() &&
          "LowerVAARG only handles 64-bit va_arg!");
   assert(Op.getNode()->getNumOperands() == 4);
 
   MachineFunction &MF = DAG.getMachineFunction();
-  if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()))
+  if (Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()))
     // The Win64 ABI uses char* instead of a structure.
     return DAG.expandVAArg(Op.getNode());
 
@@ -16268,9 +16268,9 @@ SDValue X86TargetLowering::LowerVAARG(SD
 
   if (ArgMode == 2) {
     // Sanity Check: Make sure using fp_offset makes sense.
-    assert(!Subtarget->useSoftFloat() &&
+    assert(!Subtarget.useSoftFloat() &&
            !(MF.getFunction()->hasFnAttribute(Attribute::NoImplicitFloat)) &&
-           Subtarget->hasSSE1());
+           Subtarget.hasSSE1());
   }
 
   // Insert VAARG_64 node into the DAG
@@ -16296,12 +16296,12 @@ SDValue X86TargetLowering::LowerVAARG(SD
                      false, false, false, 0);
 }
 
-static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget,
                            SelectionDAG &DAG) {
   // X86-64 va_list is a struct { i32, i32, i8*, i8* }, except on Windows,
   // where a va_list is still an i8*.
-  assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
-  if (Subtarget->isCallingConvWin64(
+  assert(Subtarget.is64Bit() && "This code only handles 64-bit va_copy!");
+  if (Subtarget.isCallingConvWin64(
         DAG.getMachineFunction().getFunction()->getCallingConv()))
     // Probably a Win64 va_copy.
     return DAG.expandVACopy(Op.getNode());
@@ -16424,7 +16424,7 @@ static SDValue getTargetVShiftNode(unsig
     // Let the shuffle legalizer expand this shift amount node.
     SDValue Op0 = ShAmt.getOperand(0);
     Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(Op0), MVT::v8i16, Op0);
-    ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, &Subtarget, DAG);
+    ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, Subtarget, DAG);
   } else {
     // Need to build a vector containing shift amount.
     // SSE/AVX packed shifts only use the lower 64-bit of the shift count.
@@ -16452,7 +16452,7 @@ static SDValue getTargetVShiftNode(unsig
 /// \brief Return Mask with the necessary casting or extending
 /// for \p Mask according to \p MaskVT when lowering masking intrinsics
 static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
-                           const X86Subtarget *Subtarget,
+                           const X86Subtarget &Subtarget,
                            SelectionDAG &DAG, SDLoc dl) {
 
   if (MaskVT.bitsGT(Mask.getSimpleValueType())) {
@@ -16461,9 +16461,9 @@ static SDValue getMaskNode(SDValue Mask,
                        MVT::getIntegerVT(MaskVT.getSizeInBits()), Mask);
   }
 
-  if (Mask.getSimpleValueType() == MVT::i64 && Subtarget->is32Bit()) {
+  if (Mask.getSimpleValueType() == MVT::i64 && Subtarget.is32Bit()) {
     if (MaskVT == MVT::v64i1) {
-      assert(Subtarget->hasBWI() && "Expected AVX512BW target!");
+      assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
       // In case 32bit mode, bitcast i64 is illegal, extend/split it.
       SDValue Lo, Hi;
       Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
@@ -16499,7 +16499,7 @@ static SDValue getMaskNode(SDValue Mask,
 /// necessary casting or extending for \p Mask when lowering masking intrinsics
 static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
                   SDValue PreservedSrc,
-                  const X86Subtarget *Subtarget,
+                  const X86Subtarget &Subtarget,
                   SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
   MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
@@ -16544,7 +16544,7 @@ static SDValue getVectorMaskingNode(SDVa
 /// for a scalar instruction.
 static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
                                     SDValue PreservedSrc,
-                                    const X86Subtarget *Subtarget,
+                                    const X86Subtarget &Subtarget,
                                     SelectionDAG &DAG) {
   if (isAllOnesConstant(Mask))
     return Op;
@@ -16626,7 +16626,7 @@ static SDValue recoverFramePointer(Selec
   return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset);
 }
 
-static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc dl(Op);
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -17352,7 +17352,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
     // Returns one of the stack, base, or frame pointer registers, depending on
     // which is used to reference local variables.
     MachineFunction &MF = DAG.getMachineFunction();
-    const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+    const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
     unsigned Reg;
     if (RegInfo->hasBasePointer(MF))
       Reg = RegInfo->getBaseRegister();
@@ -17366,7 +17366,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
 static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
                               SDValue Src, SDValue Mask, SDValue Base,
                               SDValue Index, SDValue ScaleOp, SDValue Chain,
-                              const X86Subtarget * Subtarget) {
+                              const X86Subtarget &Subtarget) {
   SDLoc dl(Op);
   auto *C = cast<ConstantSDNode>(ScaleOp);
   SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
@@ -17452,7 +17452,7 @@ static SDValue getPrefetchNode(unsigned
 // getReadPerformanceCounter - Handles the lowering of builtin intrinsics that
 // read performance monitor counters (x86_rdpmc).
 static void getReadPerformanceCounter(SDNode *N, SDLoc DL,
-                              SelectionDAG &DAG, const X86Subtarget *Subtarget,
+                              SelectionDAG &DAG, const X86Subtarget &Subtarget,
                               SmallVectorImpl<SDValue> &Results) {
   assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
   SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -17466,7 +17466,7 @@ static void getReadPerformanceCounter(SD
 
   // Reads the content of a 64-bit performance counter and returns it in the
   // registers EDX:EAX.
-  if (Subtarget->is64Bit()) {
+  if (Subtarget.is64Bit()) {
     LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
     HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
                             LO.getValue(2));
@@ -17477,7 +17477,7 @@ static void getReadPerformanceCounter(SD
   }
   Chain = HI.getValue(1);
 
-  if (Subtarget->is64Bit()) {
+  if (Subtarget.is64Bit()) {
     // The EAX register is loaded with the low-order 32 bits. The EDX register
     // is loaded with the supported high-order bits of the counter.
     SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
@@ -17498,7 +17498,7 @@ static void getReadPerformanceCounter(SD
 // read the time stamp counter (x86_rdtsc and x86_rdtscp). This function is
 // also used to custom lower READCYCLECOUNTER nodes.
 static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode,
-                              SelectionDAG &DAG, const X86Subtarget *Subtarget,
+                              SelectionDAG &DAG, const X86Subtarget &Subtarget,
                               SmallVectorImpl<SDValue> &Results) {
   SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
   SDValue rd = DAG.getNode(Opcode, DL, Tys, N->getOperand(0));
@@ -17507,7 +17507,7 @@ static void getReadTimeStampCounter(SDNo
   // The processor's time-stamp counter (a 64-bit MSR) is stored into the
   // EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR
   // and the EAX register is loaded with the low-order 32 bits.
-  if (Subtarget->is64Bit()) {
+  if (Subtarget.is64Bit()) {
     LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
     HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
                             LO.getValue(2));
@@ -17531,7 +17531,7 @@ static void getReadTimeStampCounter(SDNo
                          MachinePointerInfo(), false, false, 0);
   }
 
-  if (Subtarget->is64Bit()) {
+  if (Subtarget.is64Bit()) {
     // The EDX register is loaded with the high-order 32 bits of the MSR, and
     // the EAX register is loaded with the low-order 32 bits.
     SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
@@ -17548,7 +17548,7 @@ static void getReadTimeStampCounter(SDNo
   Results.push_back(Chain);
 }
 
-static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget,
                                      SelectionDAG &DAG) {
   SmallVector<SDValue, 2> Results;
   SDLoc DL(Op);
@@ -17575,7 +17575,7 @@ static SDValue MarkEHRegistrationNode(SD
   return Chain;
 }
 
-static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
                                       SelectionDAG &DAG) {
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 
@@ -17832,7 +17832,7 @@ SDValue X86TargetLowering::LowerRETURNAD
 
   if (Depth > 0) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
-    const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+    const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
     SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), dl, PtrVT);
     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, PtrVT,
@@ -17850,7 +17850,7 @@ SDValue X86TargetLowering::LowerFRAMEADD
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
-  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   EVT VT = Op.getValueType();
 
   MFI->setFrameAddressIsTaken(true);
@@ -17889,7 +17889,7 @@ SDValue X86TargetLowering::LowerFRAMEADD
 // this table could be generated automatically from RegInfo.
 unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT,
                                               SelectionDAG &DAG) const {
-  const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+  const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
   const MachineFunction &MF = DAG.getMachineFunction();
 
   unsigned Reg = StringSwitch<unsigned>(RegName)
@@ -17905,7 +17905,7 @@ unsigned X86TargetLowering::getRegisterB
                          " is allocatable: function has no frame pointer");
 #ifndef NDEBUG
     else {
-      const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+      const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
       unsigned FrameReg =
           RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
       assert((FrameReg == X86::EBP || FrameReg == X86::RBP) &&
@@ -17922,23 +17922,23 @@ unsigned X86TargetLowering::getRegisterB
 
 SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
                                                      SelectionDAG &DAG) const {
-  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op));
 }
 
 unsigned X86TargetLowering::getExceptionPointerRegister(
     const Constant *PersonalityFn) const {
   if (classifyEHPersonality(PersonalityFn) == EHPersonality::CoreCLR)
-    return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX;
+    return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX;
 
-  return Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
+  return Subtarget.isTarget64BitLP64() ? X86::RAX : X86::EAX;
 }
 
 unsigned X86TargetLowering::getExceptionSelectorRegister(
     const Constant *PersonalityFn) const {
   // Funclet personalities don't use selectors (the runtime does the selection).
   assert(!isFuncletEHPersonality(classifyEHPersonality(PersonalityFn)));
-  return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX;
+  return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX;
 }
 
 SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
@@ -17948,7 +17948,7 @@ SDValue X86TargetLowering::LowerEH_RETUR
   SDLoc dl      (Op);
 
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
-  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
   assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
           (FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
@@ -17996,9 +17996,9 @@ SDValue X86TargetLowering::LowerINIT_TRA
   SDLoc dl (Op);
 
   const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
-  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
 
-  if (Subtarget->is64Bit()) {
+  if (Subtarget.is64Bit()) {
     SDValue OutChains[6];
 
     // Large code-model.
@@ -18161,7 +18161,7 @@ SDValue X86TargetLowering::LowerFLT_ROUN
   */
 
   MachineFunction &MF = DAG.getMachineFunction();
-  const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+  const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
   unsigned StackAlignment = TFI.getStackAlignment();
   MVT VT = Op.getSimpleValueType();
   SDLoc DL(Op);
@@ -18266,14 +18266,14 @@ static SDValue LowerVectorCTLZ_AVX512(SD
   return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta);
 }
 
-static SDValue LowerCTLZ(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
                          SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
   MVT OpVT = VT;
   unsigned NumBits = VT.getSizeInBits();
   SDLoc dl(Op);
 
-  if (VT.isVector() && Subtarget->hasAVX512())
+  if (VT.isVector() && Subtarget.hasAVX512())
     return LowerVectorCTLZ_AVX512(Op, DAG);
 
   Op = Op.getOperand(0);
@@ -18305,7 +18305,7 @@ static SDValue LowerCTLZ(SDValue Op, con
   return Op;
 }
 
-static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, const X86Subtarget &Subtarget,
                                     SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
   EVT OpVT = VT;
@@ -18434,7 +18434,7 @@ static SDValue LowerMINMAX(SDValue Op, S
   return Lower256IntArith(Op, DAG);
 }
 
-static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
                         SelectionDAG &DAG) {
   SDLoc dl(Op);
   MVT VT = Op.getSimpleValueType();
@@ -18443,7 +18443,7 @@ static SDValue LowerMUL(SDValue Op, cons
     return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1));
 
   // Decompose 256-bit ops into smaller 128-bit ops.
-  if (VT.is256BitVector() && !Subtarget->hasInt256())
+  if (VT.is256BitVector() && !Subtarget.hasInt256())
     return Lower256IntArith(Op, DAG);
 
   SDValue A = Op.getOperand(0);
@@ -18452,7 +18452,7 @@ static SDValue LowerMUL(SDValue Op, cons
   // Lower v16i8/v32i8 mul as promotion to v8i16/v16i16 vector
   // pairs, multiply and truncate.
   if (VT == MVT::v16i8 || VT == MVT::v32i8) {
-    if (Subtarget->hasInt256()) {
+    if (Subtarget.hasInt256()) {
       if (VT == MVT::v32i8) {
         MVT SubVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() / 2);
         SDValue Lo = DAG.getIntPtrConstant(0, dl);
@@ -18480,7 +18480,7 @@ static SDValue LowerMUL(SDValue Op, cons
 
     // Extract the lo parts and sign extend to i16
     SDValue ALo, BLo;
-    if (Subtarget->hasSSE41()) {
+    if (Subtarget.hasSSE41()) {
       ALo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, A);
       BLo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, B);
     } else {
@@ -18496,7 +18496,7 @@ static SDValue LowerMUL(SDValue Op, cons
 
     // Extract the hi parts and sign extend to i16
     SDValue AHi, BHi;
-    if (Subtarget->hasSSE41()) {
+    if (Subtarget.hasSSE41()) {
       const int ShufMask[] = {8,  9,  10, 11, 12, 13, 14, 15,
                               -1, -1, -1, -1, -1, -1, -1, -1};
       AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
@@ -18524,7 +18524,7 @@ static SDValue LowerMUL(SDValue Op, cons
 
   // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
   if (VT == MVT::v4i32) {
-    assert(Subtarget->hasSSE2() && !Subtarget->hasSSE41() &&
+    assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() &&
            "Should not custom lower when pmuldq is available!");
 
     // Extract the odd parts.
@@ -18589,7 +18589,7 @@ static SDValue LowerMUL(SDValue Op, cons
 }
 
 SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const {
-  assert(Subtarget->isTargetWin64() && "Unexpected target");
+  assert(Subtarget.isTargetWin64() && "Unexpected target");
   EVT VT = Op.getValueType();
   assert(VT.isInteger() && VT.getSizeInBits() == 128 &&
          "Unexpected return type for lowering");
@@ -18640,14 +18640,14 @@ SDValue X86TargetLowering::LowerWin64_i1
   return DAG.getBitcast(VT, CallInfo.first);
 }
 
-static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget &Subtarget,
                              SelectionDAG &DAG) {
   SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
   MVT VT = Op0.getSimpleValueType();
   SDLoc dl(Op);
 
-  assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) ||
-         (VT == MVT::v8i32 && Subtarget->hasInt256()));
+  assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) ||
+         (VT == MVT::v8i32 && Subtarget.hasInt256()));
 
   // PMULxD operations multiply each even value (starting at 0) of LHS with
   // the related value of RHS and produce a widen result.
@@ -18672,7 +18672,7 @@ static SDValue LowerMUL_LOHI(SDValue Op,
   MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64;
   bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI;
   unsigned Opcode =
-      (!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
+      (!IsSigned || !Subtarget.hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
   // PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
   // => <2 x i64> <ae|cg>
   SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Op0, Op1));
@@ -18696,7 +18696,7 @@ static SDValue LowerMUL_LOHI(SDValue Op,
 
   // If we have a signed multiply but no PMULDQ fix up the high parts of a
   // unsigned multiply.
-  if (IsSigned && !Subtarget->hasSSE41()) {
+  if (IsSigned && !Subtarget.hasSSE41()) {
     SDValue ShAmt = DAG.getConstant(
         31, dl,
         DAG.getTargetLoweringInfo().getShiftAmountTy(VT, DAG.getDataLayout()));
@@ -18717,19 +18717,19 @@ static SDValue LowerMUL_LOHI(SDValue Op,
 
 // Return true if the required (according to Opcode) shift-imm form is natively
 // supported by the Subtarget
-static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget,
+static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
                                         unsigned Opcode) {
   if (VT.getScalarSizeInBits() < 16)
     return false;
 
   if (VT.is512BitVector() &&
-      (VT.getScalarSizeInBits() > 16 || Subtarget->hasBWI()))
+      (VT.getScalarSizeInBits() > 16 || Subtarget.hasBWI()))
     return true;
 
   bool LShift = VT.is128BitVector() ||
-    (VT.is256BitVector() && Subtarget->hasInt256());
+    (VT.is256BitVector() && Subtarget.hasInt256());
 
-  bool AShift = LShift && (Subtarget->hasVLX() ||
+  bool AShift = LShift && (Subtarget.hasVLX() ||
     (VT != MVT::v2i64 && VT != MVT::v4i64));
   return (Opcode == ISD::SRA) ? AShift : LShift;
 }
@@ -18737,24 +18737,24 @@ static bool SupportedVectorShiftWithImm(
 // The shift amount is a variable, but it is the same for all vector lanes.
 // These instructions are defined together with shift-immediate.
 static
-bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget,
+bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget,
                                       unsigned Opcode) {
   return SupportedVectorShiftWithImm(VT, Subtarget, Opcode);
 }
 
 // Return true if the required (according to Opcode) variable-shift form is
 // natively supported by the Subtarget
-static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget,
+static bool SupportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget,
                                     unsigned Opcode) {
 
-  if (!Subtarget->hasInt256() || VT.getScalarSizeInBits() < 16)
+  if (!Subtarget.hasInt256() || VT.getScalarSizeInBits() < 16)
     return false;
 
   // vXi16 supported only on AVX-512, BWI
-  if (VT.getScalarSizeInBits() == 16 && !Subtarget->hasBWI())
+  if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI())
     return false;
 
-  if (VT.is512BitVector() || Subtarget->hasVLX())
+  if (VT.is512BitVector() || Subtarget.hasVLX())
     return true;
 
   bool LShift = VT.is128BitVector() || VT.is256BitVector();
@@ -18763,7 +18763,7 @@ static bool SupportedVectorVarShift(MVT
 }
 
 static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
-                                         const X86Subtarget *Subtarget) {
+                                         const X86Subtarget &Subtarget) {
   MVT VT = Op.getSimpleValueType();
   SDLoc dl(Op);
   SDValue R = Op.getOperand(0);
@@ -18813,12 +18813,12 @@ static SDValue LowerScalarImmediateShift
         return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
 
       // i64 SRA needs to be performed as partial shifts.
-      if ((VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
-          Op.getOpcode() == ISD::SRA && !Subtarget->hasXOP())
+      if ((VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64)) &&
+          Op.getOpcode() == ISD::SRA && !Subtarget.hasXOP())
         return ArithmeticShiftRight64(ShiftAmt);
 
       if (VT == MVT::v16i8 ||
-          (Subtarget->hasInt256() && VT == MVT::v32i8) ||
+          (Subtarget.hasInt256() && VT == MVT::v32i8) ||
           VT == MVT::v64i8) {
         unsigned NumElts = VT.getVectorNumElements();
         MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
@@ -18834,7 +18834,7 @@ static SDValue LowerScalarImmediateShift
         }
 
         // XOP can shift v16i8 directly instead of as shift v8i16 + mask.
-        if (VT == MVT::v16i8 && Subtarget->hasXOP())
+        if (VT == MVT::v16i8 && Subtarget.hasXOP())
           return SDValue();
 
         if (Op.getOpcode() == ISD::SHL) {
@@ -18870,8 +18870,8 @@ static SDValue LowerScalarImmediateShift
   }
 
   // Special case in 32-bit mode, where i64 is expanded into high and low parts.
-  if (!Subtarget->is64Bit() && !Subtarget->hasXOP() &&
-      (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64))) {
+  if (!Subtarget.is64Bit() && !Subtarget.hasXOP() &&
+      (VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64))) {
 
     // Peek through any splat that was introduced for i64 shift vectorization.
     int SplatIndex = -1;
@@ -18928,7 +18928,7 @@ static SDValue LowerScalarImmediateShift
 }
 
 static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
-                                        const X86Subtarget* Subtarget) {
+                                        const X86Subtarget &Subtarget) {
   MVT VT = Op.getSimpleValueType();
   SDLoc dl(Op);
   SDValue R = Op.getOperand(0);
@@ -18989,7 +18989,7 @@ static SDValue LowerScalarVariableShift(
   }
 
   // Special case in 32-bit mode, where i64 is expanded into high and low parts.
-  if (!Subtarget->is64Bit() && VT == MVT::v2i64  &&
+  if (!Subtarget.is64Bit() && VT == MVT::v2i64  &&
       Amt.getOpcode() == ISD::BITCAST &&
       Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
     Amt = Amt.getOperand(0);
@@ -19010,7 +19010,7 @@ static SDValue LowerScalarVariableShift(
   return SDValue();
 }
 
-static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
+static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
                           SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
   SDLoc dl(Op);
@@ -19018,7 +19018,7 @@ static SDValue LowerShift(SDValue Op, co
   SDValue Amt = Op.getOperand(1);
 
   assert(VT.isVector() && "Custom lowering only for vector shifts!");
-  assert(Subtarget->hasSSE2() && "Only custom lower when we have SSE2!");
+  assert(Subtarget.hasSSE2() && "Only custom lower when we have SSE2!");
 
   if (SDValue V = LowerScalarImmediateShift(Op, DAG, Subtarget))
     return V;
@@ -19031,7 +19031,7 @@ static SDValue LowerShift(SDValue Op, co
 
   // XOP has 128-bit variable logical/arithmetic shifts.
   // +ve/-ve Amt = shift left/right.
-  if (Subtarget->hasXOP() &&
+  if (Subtarget.hasXOP() &&
       (VT == MVT::v2i64 || VT == MVT::v4i32 ||
        VT == MVT::v8i16 || VT == MVT::v16i8)) {
     if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) {
@@ -19058,7 +19058,7 @@ static SDValue LowerShift(SDValue Op, co
   // i64 vector arithmetic shift can be emulated with the transform:
   // M = lshr(SIGN_BIT, Amt)
   // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
-  if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget->hasInt256())) &&
+  if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) &&
       Op.getOpcode() == ISD::SRA) {
     SDValue S = DAG.getConstant(APInt::getSignBit(64), dl, VT);
     SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
@@ -19073,7 +19073,7 @@ static SDValue LowerShift(SDValue Op, co
   // Do this only if the vector shift count is a constant build_vector.
   if (Op.getOpcode() == ISD::SHL &&
       (VT == MVT::v8i16 || VT == MVT::v4i32 ||
-       (Subtarget->hasInt256() && VT == MVT::v16i16)) &&
+       (Subtarget.hasInt256() && VT == MVT::v16i16)) &&
       ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
     SmallVector<SDValue, 8> Elts;
     MVT SVT = VT.getVectorElementType();
@@ -19233,14 +19233,14 @@ static SDValue LowerShift(SDValue Op, co
   }
 
   if (VT == MVT::v16i8 ||
-      (VT == MVT::v32i8 && Subtarget->hasInt256() && !Subtarget->hasXOP())) {
+      (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP())) {
     MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
     unsigned ShiftOpcode = Op->getOpcode();
 
     auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
       // On SSE41 targets we make use of the fact that VSELECT lowers
       // to PBLENDVB which selects bytes based just on the sign bit.
-      if (Subtarget->hasSSE41()) {
+      if (Subtarget.hasSSE41()) {
         V0 = DAG.getBitcast(VT, V0);
         V1 = DAG.getBitcast(VT, V1);
         Sel = DAG.getBitcast(VT, Sel);
@@ -19343,7 +19343,7 @@ static SDValue LowerShift(SDValue Op, co
   // It's worth extending once and using the v8i32 shifts for 16-bit types, but
   // the extra overheads to get from v16i8 to v8i32 make the existing SSE
   // solution better.
-  if (Subtarget->hasInt256() && VT == MVT::v8i16) {
+  if (Subtarget.hasInt256() && VT == MVT::v8i16) {
     MVT ExtVT = MVT::v8i32;
     unsigned ExtOpc =
         Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
@@ -19353,7 +19353,7 @@ static SDValue LowerShift(SDValue Op, co
                        DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
   }
 
-  if (Subtarget->hasInt256() && !Subtarget->hasXOP() && VT == MVT::v16i16) {
+  if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) {
     MVT ExtVT = MVT::v8i32;
     SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
     SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z);
@@ -19377,7 +19377,7 @@ static SDValue LowerShift(SDValue Op, co
     auto SignBitSelect = [&](SDValue Sel, SDValue V0, SDValue V1) {
       // On SSE41 targets we make use of the fact that VSELECT lowers
       // to PBLENDVB which selects bytes based just on the sign bit.
-      if (Subtarget->hasSSE41()) {
+      if (Subtarget.hasSSE41()) {
         MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2);
         V0 = DAG.getBitcast(ExtVT, V0);
         V1 = DAG.getBitcast(ExtVT, V1);
@@ -19394,7 +19394,7 @@ static SDValue LowerShift(SDValue Op, co
     };
 
     // Turn 'a' into a mask suitable for VSELECT: a = a << 12;
-    if (Subtarget->hasSSE41()) {
+    if (Subtarget.hasSSE41()) {
       // On SSE41 targets we need to replicate the shift mask in both
       // bytes for PBLENDVB.
       Amt = DAG.getNode(
@@ -19469,7 +19469,7 @@ static SDValue LowerShift(SDValue Op, co
   return SDValue();
 }
 
-static SDValue LowerRotate(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
                            SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
   SDLoc DL(Op);
@@ -19477,7 +19477,7 @@ static SDValue LowerRotate(SDValue Op, c
   SDValue Amt = Op.getOperand(1);
 
   assert(VT.isVector() && "Custom lowering only for vector rotates!");
-  assert(Subtarget->hasXOP() && "XOP support required for vector rotates!");
+  assert(Subtarget.hasXOP() && "XOP support required for vector rotates!");
   assert((Op.getOpcode() == ISD::ROTL) && "Only ROTL supported");
 
   // XOP has 128-bit vector variable + immediate rotates.
@@ -19589,9 +19589,9 @@ bool X86TargetLowering::needsCmpXchgNb(T
   unsigned OpWidth = MemType->getPrimitiveSizeInBits();
 
   if (OpWidth == 64)
-    return !Subtarget->is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b
+    return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b
   else if (OpWidth == 128)
-    return Subtarget->hasCmpxchg16b();
+    return Subtarget.hasCmpxchg16b();
   else
     return false;
 }
@@ -19611,7 +19611,7 @@ X86TargetLowering::shouldExpandAtomicLoa
 
 TargetLowering::AtomicExpansionKind
 X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
-  unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32;
+  unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
   Type *MemType = AI->getType();
 
   // If the operand is too big, we must see if cmpxchg8/16b is available
@@ -19648,7 +19648,7 @@ X86TargetLowering::shouldExpandAtomicRMW
   }
 }
 
-static bool hasMFENCE(const X86Subtarget& Subtarget) {
+static bool hasMFENCE(const X86Subtarget &Subtarget) {
   // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
   // no-sse2). There isn't any reason to disable it if the target processor
   // supports it.
@@ -19657,7 +19657,7 @@ static bool hasMFENCE(const X86Subtarget
 
 LoadInst *
 X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
-  unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32;
+  unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
   Type *MemType = AI->getType();
   // Accesses larger than the native width are turned into cmpxchg/libcalls, so
   // there is no benefit in turning such RMWs into loads, and it is actually
@@ -19694,7 +19694,7 @@ X86TargetLowering::lowerIdempotentRMWInt
     // the IR level, so we must wrap it in an intrinsic.
     return nullptr;
 
-  if (!hasMFENCE(*Subtarget))
+  if (!hasMFENCE(Subtarget))
     // FIXME: it might make sense to use a locked operation here but on a
     // different cache-line to prevent cache-line bouncing. In practice it
     // is probably a small win, and x86 processors without mfence are rare
@@ -19714,7 +19714,7 @@ X86TargetLowering::lowerIdempotentRMWInt
   return Loaded;
 }
 
-static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
                                  SelectionDAG &DAG) {
   SDLoc dl(Op);
   AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
@@ -19725,7 +19725,7 @@ static SDValue LowerATOMIC_FENCE(SDValue
   // The only fence that needs an instruction is a sequentially-consistent
   // cross-thread fence.
   if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) {
-    if (hasMFENCE(*Subtarget))
+    if (hasMFENCE(Subtarget))
       return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
 
     SDValue Chain = Op.getOperand(0);
@@ -19747,7 +19747,7 @@ static SDValue LowerATOMIC_FENCE(SDValue
   return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
 }
 
-static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget,
                              SelectionDAG &DAG) {
   MVT T = Op.getSimpleValueType();
   SDLoc DL(Op);
@@ -19759,7 +19759,7 @@ static SDValue LowerCMP_SWAP(SDValue Op,
   case MVT::i16: Reg = X86::AX;  size = 2; break;
   case MVT::i32: Reg = X86::EAX; size = 4; break;
   case MVT::i64:
-    assert(Subtarget->is64Bit() && "Node not type legal!");
+    assert(Subtarget.is64Bit() && "Node not type legal!");
     Reg = X86::RAX; size = 8;
     break;
   }
@@ -19789,14 +19789,14 @@ static SDValue LowerCMP_SWAP(SDValue Op,
   return SDValue();
 }
 
-static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
                             SelectionDAG &DAG) {
   MVT SrcVT = Op.getOperand(0).getSimpleValueType();
   MVT DstVT = Op.getSimpleValueType();
 
   if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
       SrcVT == MVT::i64) {
-    assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+    assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
     if (DstVT != MVT::f64)
       // This conversion needs to be expanded.
       return SDValue();
@@ -19816,7 +19816,7 @@ static SDValue LowerBITCAST(SDValue Op,
         Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, Op0,
                                    DAG.getIntPtrConstant(i, dl)));
     } else {
-      assert(SrcVT == MVT::i64 && !Subtarget->is64Bit() &&
+      assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() &&
              "Unexpected source type in LowerBITCAST");
       Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
                                  DAG.getIntPtrConstant(0, dl)));
@@ -19835,8 +19835,8 @@ static SDValue LowerBITCAST(SDValue Op,
                        DAG.getIntPtrConstant(0, dl));
   }
 
-  assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
-         Subtarget->hasMMX() && "Unexpected custom BITCAST");
+  assert(Subtarget.is64Bit() && !Subtarget.hasSSE2() &&
+         Subtarget.hasMMX() && "Unexpected custom BITCAST");
   assert((DstVT == MVT::i64 ||
           (DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
          "Unexpected custom BITCAST");
@@ -19859,7 +19859,7 @@ static SDValue LowerBITCAST(SDValue Op,
 /// how many bytes of V are summed horizontally to produce each element of the
 /// result.
 static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
-                                      const X86Subtarget *Subtarget,
+                                      const X86Subtarget &Subtarget,
                                       SelectionDAG &DAG) {
   SDLoc DL(V);
   MVT ByteVecVT = V.getSimpleValueType();
@@ -19924,7 +19924,7 @@ static SDValue LowerHorizontalByteSum(SD
 }
 
 static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, SDLoc DL,
-                                        const X86Subtarget *Subtarget,
+                                        const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
   MVT EltVT = VT.getVectorElementType();
@@ -19984,7 +19984,7 @@ static SDValue LowerVectorCTPOPInRegLUT(
 }
 
 static SDValue LowerVectorCTPOPBitmath(SDValue Op, SDLoc DL,
-                                       const X86Subtarget *Subtarget,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
   assert(VT.is128BitVector() &&
@@ -20054,7 +20054,7 @@ static SDValue LowerVectorCTPOPBitmath(S
       DAG);
 }
 
-static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
                                 SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
   // FIXME: Need to add AVX-512 support here!
@@ -20063,13 +20063,13 @@ static SDValue LowerVectorCTPOP(SDValue
   SDLoc DL(Op.getNode());
   SDValue Op0 = Op.getOperand(0);
 
-  if (!Subtarget->hasSSSE3()) {
+  if (!Subtarget.hasSSSE3()) {
     // We can't use the fast LUT approach, so fall back on vectorized bitmath.
     assert(VT.is128BitVector() && "Only 128-bit vectors supported in SSE!");
     return LowerVectorCTPOPBitmath(Op0, DL, Subtarget, DAG);
   }
 
-  if (VT.is256BitVector() && !Subtarget->hasInt256()) {
+  if (VT.is256BitVector() && !Subtarget.hasInt256()) {
     unsigned NumElems = VT.getVectorNumElements();
 
     // Extract each 128-bit vector, compute pop count and concat the result.
@@ -20084,7 +20084,7 @@ static SDValue LowerVectorCTPOP(SDValue
   return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG);
 }
 
-static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerCTPOP(SDValue Op, const X86Subtarget &Subtarget,
                           SelectionDAG &DAG) {
   assert(Op.getSimpleValueType().isVector() &&
          "We only do custom lowering for vector population count.");
@@ -20157,9 +20157,9 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(
                      Op.getOperand(1), Op.getOperand(2));
 }
 
-static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
                             SelectionDAG &DAG) {
-  assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
+  assert(Subtarget.isTargetDarwin() && Subtarget.is64Bit());
 
   // For MacOSX, we want to call an alternative entry point: __sincos_stret,
   // which returns the values as { float, float } (in XMM0) or
@@ -20261,9 +20261,9 @@ static SDValue ExtendToType(SDValue InOp
                      InOp, DAG.getIntPtrConstant(0, dl));
 }
 
-static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
                              SelectionDAG &DAG) {
-  assert(Subtarget->hasAVX512() &&
+  assert(Subtarget.hasAVX512() &&
          "MGATHER/MSCATTER are supported on AVX-512 arch only");
 
   // X86 scatter kills mask register, so its type should be added to
@@ -20312,7 +20312,7 @@ static SDValue LowerMSCATTER(SDValue Op,
   }
 
   unsigned NumElts = VT.getVectorNumElements();
-  if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
+  if (!Subtarget.hasVLX() && !VT.is512BitVector() &&
       !Index.getSimpleValueType().is512BitVector()) {
     // AVX512F supports only 512-bit vectors. Or data or index should
     // be 512 bit wide. If now the both index and data are 256-bit, but
@@ -20355,7 +20355,7 @@ static SDValue LowerMSCATTER(SDValue Op,
   return SDValue(NewScatter.getNode(), 0);
 }
 
-static SDValue LowerMLOAD(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
                           SelectionDAG &DAG) {
 
   MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
@@ -20363,7 +20363,7 @@ static SDValue LowerMLOAD(SDValue Op, co
   SDValue Mask = N->getMask();
   SDLoc dl(Op);
 
-  if (Subtarget->hasAVX512() && !Subtarget->hasVLX() &&
+  if (Subtarget.hasAVX512() && !Subtarget.hasVLX() &&
       !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) {
     // This operation is legal for targets with VLX, but without
     // VLX the vector should be widened to 512 bit
@@ -20387,7 +20387,7 @@ static SDValue LowerMLOAD(SDValue Op, co
   return Op;
 }
 
-static SDValue LowerMSTORE(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
                            SelectionDAG &DAG) {
   MaskedStoreSDNode *N = cast<MaskedStoreSDNode>(Op.getNode());
   SDValue DataToStore = N->getValue();
@@ -20395,7 +20395,7 @@ static SDValue LowerMSTORE(SDValue Op, c
   SDValue Mask = N->getMask();
   SDLoc dl(Op);
 
-  if (Subtarget->hasAVX512() && !Subtarget->hasVLX() &&
+  if (Subtarget.hasAVX512() && !Subtarget.hasVLX() &&
       !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) {
     // This operation is legal for targets with VLX, but without
     // VLX the vector should be widened to 512 bit
@@ -20411,9 +20411,9 @@ static SDValue LowerMSTORE(SDValue Op, c
   return Op;
 }
 
-static SDValue LowerMGATHER(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
                             SelectionDAG &DAG) {
-  assert(Subtarget->hasAVX512() &&
+  assert(Subtarget.hasAVX512() &&
          "MGATHER/MSCATTER are supported on AVX-512 arch only");
 
   MaskedGatherSDNode *N = cast<MaskedGatherSDNode>(Op.getNode());
@@ -20428,7 +20428,7 @@ static SDValue LowerMGATHER(SDValue Op,
   unsigned NumElts = VT.getVectorNumElements();
   assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");
 
-  if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
+  if (!Subtarget.hasVLX() && !VT.is512BitVector() &&
       !Index.getSimpleValueType().is512BitVector()) {
     // AVX512F supports only 512-bit vectors. Or data or index should
     // be 512 bit wide. If now the both index and data are 256-bit, but
@@ -20656,15 +20656,15 @@ void X86TargetLowering::ReplaceNodeResul
     llvm_unreachable("Do not know how to custom type legalize this operation!");
   case X86ISD::AVG: {
     // Legalize types for X86ISD::AVG by expanding vectors.
-    assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+    assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
 
     auto InVT = N->getValueType(0);
     auto InVTSize = InVT.getSizeInBits();
     const unsigned RegSize =
         (InVTSize > 128) ? ((InVTSize > 256) ? 512 : 256) : 128;
-    assert((!Subtarget->hasAVX512() || RegSize < 512) &&
+    assert((!Subtarget.hasAVX512() || RegSize < 512) &&
            "512-bit vector requires AVX512");
-    assert((!Subtarget->hasAVX2() || RegSize < 256) &&
+    assert((!Subtarget.hasAVX2() || RegSize < 256) &&
            "256-bit vector requires AVX2");
 
     auto ElemVT = InVT.getVectorElementType();
@@ -20736,7 +20736,7 @@ void X86TargetLowering::ReplaceNodeResul
     return;
   }
   case ISD::UINT_TO_FP: {
-    assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+    assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
     if (N->getOperand(0).getValueType() != MVT::v2i32 ||
         N->getValueType(0) != MVT::v2f32)
       return;
@@ -20863,7 +20863,7 @@ void X86TargetLowering::ReplaceNodeResul
     break;
   }
   case ISD::BITCAST: {
-    assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+    assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
     EVT DstVT = N->getValueType(0);
     EVT SrcVT = N->getOperand(0)->getValueType(0);
 
@@ -21154,7 +21154,7 @@ bool X86TargetLowering::isLegalAddressin
 
   if (AM.BaseGV) {
     unsigned GVFlags =
-      Subtarget->ClassifyGlobalReference(AM.BaseGV, getTargetMachine());
+      Subtarget.ClassifyGlobalReference(AM.BaseGV, getTargetMachine());
 
     // If a reference to this global requires an extra load, we can't fold it.
     if (isGlobalStubReference(GVFlags))
@@ -21167,7 +21167,7 @@ bool X86TargetLowering::isLegalAddressin
 
     // If lower 4G is not available, then we must use rip-relative addressing.
     if ((M != CodeModel::Small || R != Reloc::Static) &&
-        Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
+        Subtarget.is64Bit() && (AM.BaseOffs || AM.Scale > 1))
       return false;
   }
 
@@ -21204,7 +21204,7 @@ bool X86TargetLowering::isVectorShiftByS
 
   // On AVX2 there are new vpsllv[dq] instructions (and other shifts), that make
   // variable shifts just as cheap as scalar ones.
-  if (Subtarget->hasInt256() && (Bits == 32 || Bits == 64))
+  if (Subtarget.hasInt256() && (Bits == 32 || Bits == 64))
     return false;
 
   // Otherwise, it's significantly cheaper to shift by a scalar amount than by a
@@ -21253,12 +21253,12 @@ bool X86TargetLowering::isTruncateFree(E
 
 bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
   // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
-  return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit();
+  return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget.is64Bit();
 }
 
 bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
   // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
-  return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
+  return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget.is64Bit();
 }
 
 bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
@@ -21289,7 +21289,7 @@ bool X86TargetLowering::isVectorLoadExtD
 
 bool
 X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
-  if (!Subtarget->hasAnyFMA())
+  if (!Subtarget.hasAnyFMA())
     return false;
 
   VT = VT.getScalarType();
@@ -21478,9 +21478,9 @@ static MachineBasicBlock *EmitPCMPSTRI(M
 }
 
 static MachineBasicBlock *EmitWRPKRU(MachineInstr *MI, MachineBasicBlock *BB,
-                                     const X86Subtarget *Subtarget) {
+                                     const X86Subtarget &Subtarget) {
   DebugLoc dl = MI->getDebugLoc();
-  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
 
   // insert input VAL into EAX
   BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
@@ -21501,9 +21501,9 @@ static MachineBasicBlock *EmitWRPKRU(Mac
 }
 
 static MachineBasicBlock *EmitRDPKRU(MachineInstr *MI, MachineBasicBlock *BB,
-                                     const X86Subtarget *Subtarget) {
+                                     const X86Subtarget &Subtarget) {
   DebugLoc dl = MI->getDebugLoc();
-  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
 
   // insert zero to ECX
   BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::ECX)
@@ -21519,12 +21519,12 @@ static MachineBasicBlock *EmitRDPKRU(Mac
 }
 
 static MachineBasicBlock *EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB,
-                                      const X86Subtarget *Subtarget) {
+                                      const X86Subtarget &Subtarget) {
   DebugLoc dl = MI->getDebugLoc();
-  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   // Address into RAX/EAX, other two args into ECX, EDX.
-  unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
-  unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+  unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r;
+  unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
   MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
   for (int i = 0; i < X86::AddrNumOperands; ++i)
     MIB.addOperand(MI->getOperand(i));
@@ -21575,7 +21575,7 @@ X86TargetLowering::EmitVAARG64WithCustom
   MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
 
   // Machine Information
-  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
   const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
   const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
@@ -21829,14 +21829,14 @@ X86TargetLowering::EmitVAStartSaveXMMReg
   XMMSaveMBB->addSuccessor(EndMBB);
 
   // Now add the instructions.
-  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
   unsigned CountReg = MI->getOperand(0).getReg();
   int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
   int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
 
-  if (!Subtarget->isCallingConvWin64(F->getFunction()->getCallingConv())) {
+  if (!Subtarget.isCallingConvWin64(F->getFunction()->getCallingConv())) {
     // If %al is 0, branch around the XMM save block.
     BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
     BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB);
@@ -21849,7 +21849,7 @@ X86TargetLowering::EmitVAStartSaveXMMReg
           !MI->getOperand(MI->getNumOperands() - 1).isReg() ||
           MI->getOperand(MI->getNumOperands() - 1).getReg() == X86::EFLAGS)
          && "Expected last argument to be EFLAGS");
-  unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
+  unsigned MOVOpc = Subtarget.hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
   // In the XMM save block, save all the XMM argument registers.
   for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) {
     int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
@@ -21944,7 +21944,7 @@ static bool isCMOVPseudo(MachineInstr *M
 MachineBasicBlock *
 X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
                                      MachineBasicBlock *BB) const {
-  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
   // To "insert" a SELECT_CC instruction, we actually have to insert the
@@ -22112,7 +22112,7 @@ X86TargetLowering::EmitLoweredSelect(Mac
 
   // If the EFLAGS register isn't dead in the terminator, then claim that it's
   // live into the sink and copy blocks.
-  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
 
   MachineInstr *LastEFLAGSUser = CascadedCMOV ? CascadedCMOV : LastCMOV;
   if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) &&
@@ -22234,7 +22234,7 @@ X86TargetLowering::EmitLoweredAtomicFP(M
   case X86::RELEASE_FADD32mr: MOp = X86::MOVSSmr; FOp = X86::ADDSSrm; break;
   case X86::RELEASE_FADD64mr: MOp = X86::MOVSDmr; FOp = X86::ADDSDrm; break;
   }
-  const X86InstrInfo *TII = Subtarget->getInstrInfo();
+  const X86InstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
   MachineOperand MSrc = MI->getOperand(0);
@@ -22267,14 +22267,14 @@ MachineBasicBlock *
 X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
                                         MachineBasicBlock *BB) const {
   MachineFunction *MF = BB->getParent();
-  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
 
   assert(MF->shouldSplitStack());
 
-  const bool Is64Bit = Subtarget->is64Bit();
-  const bool IsLP64 = Subtarget->isTarget64BitLP64();
+  const bool Is64Bit = Subtarget.is64Bit();
+  const bool IsLP64 = Subtarget.isTarget64BitLP64();
 
   const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
   const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30;
@@ -22308,7 +22308,7 @@ X86TargetLowering::EmitLoweredSegAlloca(
     tmpSPVReg = MRI.createVirtualRegister(AddrRegClass),
     SPLimitVReg = MRI.createVirtualRegister(AddrRegClass),
     sizeVReg = MI->getOperand(1).getReg(),
-    physSPReg = IsLP64 || Subtarget->isTargetNaCl64() ? X86::RSP : X86::ESP;
+    physSPReg = IsLP64 || Subtarget.isTargetNaCl64() ? X86::RSP : X86::ESP;
 
   MachineFunction::iterator MBBIter = ++BB->getIterator();
 
@@ -22340,7 +22340,7 @@ X86TargetLowering::EmitLoweredSegAlloca(
 
   // Calls into a routine in libgcc to allocate more space from the heap.
   const uint32_t *RegMask =
-      Subtarget->getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C);
+      Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C);
   if (IsLP64) {
     BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
       .addReg(sizeVReg);
@@ -22397,9 +22397,9 @@ X86TargetLowering::EmitLoweredSegAlloca(
 MachineBasicBlock *
 X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
                                         MachineBasicBlock *BB) const {
-  assert(!Subtarget->isTargetMachO());
+  assert(!Subtarget.isTargetMachO());
   DebugLoc DL = MI->getDebugLoc();
-  MachineInstr *ResumeMI = Subtarget->getFrameLowering()->emitStackProbe(
+  MachineInstr *ResumeMI = Subtarget.getFrameLowering()->emitStackProbe(
       *BB->getParent(), *BB, MI, DL, false);
   MachineBasicBlock *ResumeBB = ResumeMI->getParent();
   MI->eraseFromParent(); // The pseudo instruction is gone now.
@@ -22410,7 +22410,7 @@ MachineBasicBlock *
 X86TargetLowering::EmitLoweredCatchRet(MachineInstr *MI,
                                        MachineBasicBlock *BB) const {
   MachineFunction *MF = BB->getParent();
-  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   MachineBasicBlock *TargetMBB = MI->getOperand(0).getMBB();
   DebugLoc DL = MI->getDebugLoc();
 
@@ -22419,7 +22419,7 @@ X86TargetLowering::EmitLoweredCatchRet(M
          "SEH does not use catchret!");
 
   // Only 32-bit EH needs to worry about manually restoring stack pointers.
-  if (!Subtarget->is32Bit())
+  if (!Subtarget.is32Bit())
     return BB;
 
   // C++ EH creates a new target block to hold the restore code, and wires up
@@ -22445,8 +22445,8 @@ X86TargetLowering::EmitLoweredCatchPad(M
   const Constant *PerFn = MF->getFunction()->getPersonalityFn();
   bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(PerFn));
   // Only 32-bit SEH requires special handling for catchpad.
-  if (IsSEH && Subtarget->is32Bit()) {
-    const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
+  if (IsSEH && Subtarget.is32Bit()) {
+    const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
     DebugLoc DL = MI->getDebugLoc();
     BuildMI(*BB, MI, DL, TII.get(X86::EH_RESTORE));
   }
@@ -22462,20 +22462,20 @@ X86TargetLowering::EmitLoweredTLSCall(Ma
   // or EAX and doing an indirect call.  The return value will then
   // be in the normal return register.
   MachineFunction *F = BB->getParent();
-  const X86InstrInfo *TII = Subtarget->getInstrInfo();
+  const X86InstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
-  assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
+  assert(Subtarget.isTargetDarwin() && "Darwin only instr emitted?");
   assert(MI->getOperand(3).isGlobal() && "This should be a global");
 
   // Get a register mask for the lowered call.
   // FIXME: The 32-bit calls have non-standard calling conventions. Use a
   // proper register mask.
   const uint32_t *RegMask =
-      Subtarget->is64Bit() ?
-      Subtarget->getRegisterInfo()->getDarwinTLSCallPreservedMask() :
-      Subtarget->getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
-  if (Subtarget->is64Bit()) {
+      Subtarget.is64Bit() ?
+      Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() :
+      Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
+  if (Subtarget.is64Bit()) {
     MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
                                       TII->get(X86::MOV64rm), X86::RDI)
     .addReg(X86::RIP)
@@ -22519,7 +22519,7 @@ X86TargetLowering::emitEHSjLjSetJmp(Mach
                                     MachineBasicBlock *MBB) const {
   DebugLoc DL = MI->getDebugLoc();
   MachineFunction *MF = MBB->getParent();
-  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   MachineRegisterInfo &MRI = MF->getRegInfo();
 
   const BasicBlock *BB = MBB->getBasicBlock();
@@ -22591,7 +22591,7 @@ X86TargetLowering::emitEHSjLjSetJmp(Mach
     PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
     const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
     LabelReg = MRI.createVirtualRegister(PtrRC);
-    if (Subtarget->is64Bit()) {
+    if (Subtarget.is64Bit()) {
       MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg)
               .addReg(X86::RIP)
               .addImm(0)
@@ -22604,7 +22604,7 @@ X86TargetLowering::emitEHSjLjSetJmp(Mach
               .addReg(XII->getGlobalBaseReg(MF))
               .addImm(0)
               .addReg(0)
-              .addMBB(restoreMBB, Subtarget->ClassifyBlockAddressReference())
+              .addMBB(restoreMBB, Subtarget.ClassifyBlockAddressReference())
               .addReg(0);
     }
   } else
@@ -22626,7 +22626,7 @@ X86TargetLowering::emitEHSjLjSetJmp(Mach
   MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
           .addMBB(restoreMBB);
 
-  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   MIB.addRegMask(RegInfo->getNoPreservedMask());
   thisMBB->addSuccessor(mainMBB);
   thisMBB->addSuccessor(restoreMBB);
@@ -22645,7 +22645,7 @@ X86TargetLowering::emitEHSjLjSetJmp(Mach
   // restoreMBB:
   if (RegInfo->hasBasePointer(*MF)) {
     const bool Uses64BitFramePtr =
-        Subtarget->isTarget64BitLP64() || Subtarget->isTargetNaCl64();
+        Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64();
     X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
     X86FI->setRestoreBasePointer(MF);
     unsigned FramePtr = RegInfo->getFrameRegister(*MF);
@@ -22668,7 +22668,7 @@ X86TargetLowering::emitEHSjLjLongJmp(Mac
                                      MachineBasicBlock *MBB) const {
   DebugLoc DL = MI->getDebugLoc();
   MachineFunction *MF = MBB->getParent();
-  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   MachineRegisterInfo &MRI = MF->getRegInfo();
 
   // Memory Reference
@@ -22683,7 +22683,7 @@ X86TargetLowering::emitEHSjLjLongJmp(Mac
     (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
   unsigned Tmp = MRI.createVirtualRegister(RC);
   // Since FP is only updated here but NOT referenced, it's treated as GPR.
-  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+  const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
   unsigned SP = RegInfo->getStackRegister();
 
@@ -22803,7 +22803,7 @@ X86TargetLowering::emitFMA3Instr(Machine
         default: llvm_unreachable("Unrecognized FMA variant.");
       }
 
-      const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
+      const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
       MachineInstrBuilder MIB =
         BuildMI(MF, MI->getDebugLoc(), TII.get(NewFMAOpc))
         .addOperand(MI->getOperand(0))
@@ -22873,7 +22873,7 @@ X86TargetLowering::EmitInstrWithCustomIn
   case X86::RDFLAGS32:
   case X86::RDFLAGS64: {
     DebugLoc DL = MI->getDebugLoc();
-    const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+    const TargetInstrInfo *TII = Subtarget.getInstrInfo();
     unsigned PushF =
         MI->getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64;
     unsigned Pop =
@@ -22888,7 +22888,7 @@ X86TargetLowering::EmitInstrWithCustomIn
   case X86::WRFLAGS32:
   case X86::WRFLAGS64: {
     DebugLoc DL = MI->getDebugLoc();
-    const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+    const TargetInstrInfo *TII = Subtarget.getInstrInfo();
     unsigned Push =
         MI->getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r;
     unsigned PopF =
@@ -22914,7 +22914,7 @@ X86TargetLowering::EmitInstrWithCustomIn
   case X86::FP80_TO_INT32_IN_MEM:
   case X86::FP80_TO_INT64_IN_MEM: {
     MachineFunction *F = BB->getParent();
-    const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+    const TargetInstrInfo *TII = Subtarget.getInstrInfo();
     DebugLoc DL = MI->getDebugLoc();
 
     // Change the floating point control register to use "round towards zero"
@@ -22996,9 +22996,9 @@ X86TargetLowering::EmitInstrWithCustomIn
   case X86::VPCMPESTRM128REG:
   case X86::PCMPESTRM128MEM:
   case X86::VPCMPESTRM128MEM:
-    assert(Subtarget->hasSSE42() &&
+    assert(Subtarget.hasSSE42() &&
            "Target must have SSE4.2 or AVX features enabled");
-    return EmitPCMPSTRM(MI, BB, Subtarget->getInstrInfo());
+    return EmitPCMPSTRM(MI, BB, Subtarget.getInstrInfo());
 
   // String/text processing lowering.
   case X86::PCMPISTRIREG:
@@ -23009,9 +23009,9 @@ X86TargetLowering::EmitInstrWithCustomIn
   case X86::VPCMPESTRIREG:
   case X86::PCMPESTRIMEM:
   case X86::VPCMPESTRIMEM:
-    assert(Subtarget->hasSSE42() &&
+    assert(Subtarget.hasSSE42() &&
            "Target must have SSE4.2 or AVX features enabled");
-    return EmitPCMPSTRI(MI, BB, Subtarget->getInstrInfo());
+    return EmitPCMPSTRI(MI, BB, Subtarget.getInstrInfo());
 
   // Thread synchronization.
   case X86::MONITOR:
@@ -23023,7 +23023,7 @@ X86TargetLowering::EmitInstrWithCustomIn
     return EmitRDPKRU(MI, BB, Subtarget);
   // xbegin
   case X86::XBEGIN:
-    return EmitXBegin(MI, BB, Subtarget->getInstrInfo());
+    return EmitXBegin(MI, BB, Subtarget.getInstrInfo());
 
   case X86::VASTART_SAVE_XMM_REGS:
     return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
@@ -23186,7 +23186,7 @@ bool X86TargetLowering::isGAPlusOffset(S
 /// FIXME: This could be expanded to support 512 bit vectors as well.
 static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
                                         TargetLowering::DAGCombinerInfo &DCI,
-                                        const X86Subtarget* Subtarget) {
+                                        const X86Subtarget &Subtarget) {
   SDLoc dl(N);
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
   SDValue V1 = SVOp->getOperand(0);
@@ -23272,7 +23272,7 @@ static SDValue PerformShuffleCombine256(
 static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
                                    int Depth, bool HasPSHUFB, SelectionDAG &DAG,
                                    TargetLowering::DAGCombinerInfo &DCI,
-                                   const X86Subtarget *Subtarget) {
+                                   const X86Subtarget &Subtarget) {
   assert(!Mask.empty() && "Cannot combine an empty shuffle mask!");
 
   // Find the operand that enters the chain. Note that multiple uses are OK
@@ -23324,7 +23324,7 @@ static bool combineX86ShuffleChain(SDVal
       // Check if we have SSE3 which will let us use MOVDDUP. That instruction
       // is no slower than UNPCKLPD but has the option to fold the input operand
       // into even an unaligned memory load.
-      if (Lo && Subtarget->hasSSE3()) {
+      if (Lo && Subtarget.hasSSE3()) {
         Shuffle = X86ISD::MOVDDUP;
         ShuffleVT = MVT::v2f64;
       } else {
@@ -23346,7 +23346,7 @@ static bool combineX86ShuffleChain(SDVal
                     /*AddTo*/ true);
       return true;
     }
-    if (Subtarget->hasSSE3() &&
+    if (Subtarget.hasSSE3() &&
         (Mask.equals({0, 0, 2, 2}) || Mask.equals({1, 1, 3, 3}))) {
       bool Lo = Mask.equals({0, 0, 2, 2});
       unsigned Shuffle = Lo ? X86ISD::MOVSLDUP : X86ISD::MOVSHDUP;
@@ -23419,7 +23419,7 @@ static bool combineX86ShuffleChain(SDVal
   // can replace them with a single PSHUFB instruction profitably. Intel's
   // manuals suggest only using PSHUFB if doing so replacing 5 instructions, but
   // in practice PSHUFB tends to be *very* fast so we're more aggressive.
-  if ((Depth >= 3 || HasPSHUFB) && Subtarget->hasSSSE3()) {
+  if ((Depth >= 3 || HasPSHUFB) && Subtarget.hasSSSE3()) {
     SmallVector<SDValue, 16> PSHUFBMask;
     int NumBytes = VT.getSizeInBits() / 8;
     int Ratio = NumBytes / Mask.size();
@@ -23484,7 +23484,7 @@ static bool combineX86ShufflesRecursivel
                                           int Depth, bool HasPSHUFB,
                                           SelectionDAG &DAG,
                                           TargetLowering::DAGCombinerInfo &DCI,
-                                          const X86Subtarget *Subtarget) {
+                                          const X86Subtarget &Subtarget) {
   // Bound the depth of our recursive combine because this is ultimately
   // quadratic in nature.
   if (Depth > 8)
@@ -23888,7 +23888,7 @@ static bool setTargetShuffleZeroElements
 /// \brief Try to combine x86 target specific shuffles.
 static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
                                            TargetLowering::DAGCombinerInfo &DCI,
-                                           const X86Subtarget *Subtarget) {
+                                           const X86Subtarget &Subtarget) {
   SDLoc DL(N);
   MVT VT = N.getSimpleValueType();
   SmallVector<int, 4> Mask;
@@ -24153,12 +24153,12 @@ static SDValue PerformTargetShuffleCombi
 /// the operands which explicitly discard the lanes which are unused by this
 /// operation to try to flow through the rest of the combiner the fact that
 /// they're unused.
-static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget *Subtarget,
+static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget,
                                       SelectionDAG &DAG) {
   SDLoc DL(N);
   EVT VT = N->getValueType(0);
-  if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
-      (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
+  if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
+      (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
     return SDValue();
 
   // We only handle target-independent shuffles.
@@ -24207,7 +24207,7 @@ static SDValue combineShuffleToAddSub(SD
 /// PerformShuffleCombine - Performs several different shuffle combines.
 static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
                                      TargetLowering::DAGCombinerInfo &DCI,
-                                     const X86Subtarget *Subtarget) {
+                                     const X86Subtarget &Subtarget) {
   SDLoc dl(N);
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -24225,7 +24225,7 @@ static SDValue PerformShuffleCombine(SDN
       return AddSub;
 
   // Combine 256-bit vector shuffles. This is only profitable when in AVX mode
-  if (TLI.isTypeLegal(VT) && Subtarget->hasFp256() && VT.is256BitVector() &&
+  if (TLI.isTypeLegal(VT) && Subtarget.hasFp256() && VT.is256BitVector() &&
       N->getOpcode() == ISD::VECTOR_SHUFFLE)
     return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
 
@@ -24418,7 +24418,7 @@ static SDValue XFormVExtractWithShuffleI
 }
 
 static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG,
-                                     const X86Subtarget *Subtarget) {
+                                     const X86Subtarget &Subtarget) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
@@ -24447,8 +24447,8 @@ static SDValue PerformBITCASTCombine(SDN
     case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
     default: return SDValue();
   }
-  if (((Subtarget->hasSSE1() && VT == MVT::f32) ||
-       (Subtarget->hasSSE2() && VT == MVT::f64)) &&
+  if (((Subtarget.hasSSE1() && VT == MVT::f32) ||
+       (Subtarget.hasSSE2() && VT == MVT::f64)) &&
       isa<ConstantSDNode>(N0.getOperand(1)) &&
       N0.getOperand(0).getOpcode() == ISD::BITCAST &&
       N0.getOperand(0).getOperand(0).getValueType() == VT) {
@@ -24613,7 +24613,7 @@ static SDValue PerformEXTRACT_VECTOR_ELT
 
 static SDValue
 transformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG,
-                                      const X86Subtarget *Subtarget) {
+                                      const X86Subtarget &Subtarget) {
   SDLoc dl(N);
   SDValue Cond = N->getOperand(0);
   SDValue LHS = N->getOperand(1);
@@ -24659,7 +24659,7 @@ transformVSELECTtoBlendVECTOR_SHUFFLE(SD
 /// nodes.
 static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
                                     TargetLowering::DAGCombinerInfo &DCI,
-                                    const X86Subtarget *Subtarget) {
+                                    const X86Subtarget &Subtarget) {
   SDLoc DL(N);
   SDValue Cond = N->getOperand(0);
   // Get the LHS/RHS of the select.
@@ -24676,8 +24676,8 @@ static SDValue PerformSELECTCombine(SDNo
   if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
       VT != MVT::f80 && VT != MVT::f128 &&
       (TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
-      (Subtarget->hasSSE2() ||
-       (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
+      (Subtarget.hasSSE2() ||
+       (Subtarget.hasSSE1() && VT.getScalarType() == MVT::f32))) {
     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
 
     unsigned Opcode = 0;
@@ -24815,7 +24815,7 @@ static SDValue PerformSELECTCombine(SDNo
   }
 
   EVT CondVT = Cond.getValueType();
-  if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() &&
+  if (Subtarget.hasAVX512() && VT.isVector() && CondVT.isVector() &&
       CondVT.getVectorElementType() == MVT::i1) {
     // v16i8 (select v16i1, v16i8, v16i8) does not have a proper
     // lowering on KNL. In this case we convert it to
@@ -24826,7 +24826,7 @@ static SDValue PerformSELECTCombine(SDNo
     if ((OpVT.is128BitVector() || OpVT.is256BitVector()) &&
         (OpVT.getVectorElementType() == MVT::i8 ||
          OpVT.getVectorElementType() == MVT::i16) &&
-        !(Subtarget->hasBWI() && Subtarget->hasVLX())) {
+        !(Subtarget.hasBWI() && Subtarget.hasVLX())) {
       Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond);
       DCI.AddToWorklist(Cond.getNode());
       return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS);
@@ -24964,8 +24964,8 @@ static SDValue PerformSELECTCombine(SDNo
   // Match VSELECTs into subs with unsigned saturation.
   if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
       // psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
-      ((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
-       (Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
+      ((Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
+       (Subtarget.hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
 
     // Check if one of the arms of the VSELECT is a zero vector. If it's on the
@@ -25119,10 +25119,10 @@ static SDValue PerformSELECTCombine(SDNo
     if (VT.getVectorElementType() == MVT::i16)
       return SDValue();
     // Dynamic blending was only available from SSE4.1 onward.
-    if (VT.is128BitVector() && !Subtarget->hasSSE41())
+    if (VT.is128BitVector() && !Subtarget.hasSSE41())
       return SDValue();
     // Byte blends are only available in AVX2
-    if (VT == MVT::v32i8 && !Subtarget->hasAVX2())
+    if (VT == MVT::v32i8 && !Subtarget.hasAVX2())
       return SDValue();
 
     assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
@@ -25350,7 +25350,7 @@ static bool checkBoolTestAndOrSetCCCombi
 /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
 static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
                                   TargetLowering::DAGCombinerInfo &DCI,
-                                  const X86Subtarget *Subtarget) {
+                                  const X86Subtarget &Subtarget) {
   SDLoc DL(N);
 
   // If the flag operand isn't dead, don't touch this CMOV.
@@ -25763,11 +25763,11 @@ static SDValue PerformSRACombine(SDNode
 /// shift by a constant amount which is known to be bigger than or equal
 /// to the vector element size in bits.
 static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
-                                      const X86Subtarget *Subtarget) {
+                                      const X86Subtarget &Subtarget) {
   EVT VT = N->getValueType(0);
 
   if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
-      (!Subtarget->hasInt256() ||
+      (!Subtarget.hasInt256() ||
        (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
     return SDValue();
 
@@ -25793,7 +25793,7 @@ static SDValue performShiftToAllZeros(SD
 /// PerformShiftCombine - Combine shifts.
 static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
                                    TargetLowering::DAGCombinerInfo &DCI,
-                                   const X86Subtarget *Subtarget) {
+                                   const X86Subtarget &Subtarget) {
   if (N->getOpcode() == ISD::SHL)
     if (SDValue V = PerformSHLCombine(N, DAG))
       return V;
@@ -25815,12 +25815,12 @@ static SDValue PerformShiftCombine(SDNod
 // and friends.  Likewise for OR -> CMPNEQSS.
 static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
                             TargetLowering::DAGCombinerInfo &DCI,
-                            const X86Subtarget *Subtarget) {
+                            const X86Subtarget &Subtarget) {
   unsigned opcode;
 
   // SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
   // we're requiring SSE2 for both.
-  if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
+  if (Subtarget.hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
     SDValue N0 = N->getOperand(0);
     SDValue N1 = N->getOperand(1);
     SDValue CMP0 = N0->getOperand(1);
@@ -25869,7 +25869,7 @@ static SDValue CMPEQCombine(SDNode *N, S
           // FIXME: need symbolic constants for these magic numbers.
           // See X86ATTInstPrinter.cpp:printSSECC().
           unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
-          if (Subtarget->hasAVX512()) {
+          if (Subtarget.hasAVX512()) {
             SDValue FSetCC = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CMP00,
                                          CMP01,
                                          DAG.getConstant(x86cc, DL, MVT::i8));
@@ -25886,7 +25886,7 @@ static SDValue CMPEQCombine(SDNode *N, S
           bool is64BitFP = (CMP00.getValueType() == MVT::f64);
           MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
 
-          if (is64BitFP && !Subtarget->is64Bit()) {
+          if (is64BitFP && !Subtarget.is64Bit()) {
             // On a 32-bit target, we cannot bitcast the 64-bit float to a
             // 64-bit integer, since that's not a legal type. Since
             // OnesOrZeroesF is all ones of all zeroes, we don't need all the
@@ -25949,7 +25949,7 @@ static bool CanFoldXORWithAllOnes(const
 // some of the transition sequences.
 static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
-                                 const X86Subtarget *Subtarget) {
+                                 const X86Subtarget &Subtarget) {
   EVT VT = N->getValueType(0);
   if (!VT.is256BitVector())
     return SDValue();
@@ -26028,7 +26028,7 @@ static SDValue WidenMaskArithmetic(SDNod
 
 static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
-                                 const X86Subtarget *Subtarget) {
+                                 const X86Subtarget &Subtarget) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   SDLoc DL(N);
@@ -26120,7 +26120,7 @@ static SDValue VectorZextCombine(SDNode
 /// types, try to convert this into a floating point logic node to avoid
 /// unnecessary moves from SSE to integer registers.
 static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
-                                        const X86Subtarget *Subtarget) {
+                                        const X86Subtarget &Subtarget) {
   unsigned FPOpcode = ISD::DELETED_NODE;
   if (N->getOpcode() == ISD::AND)
     FPOpcode = X86ISD::FAND;
@@ -26137,8 +26137,8 @@ static SDValue convertIntLogicToFPLogic(
   SDValue N1 = N->getOperand(1);
   SDLoc DL(N);
   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST &&
-      ((Subtarget->hasSSE1() && VT == MVT::i32) ||
-       (Subtarget->hasSSE2() && VT == MVT::i64))) {
+      ((Subtarget.hasSSE1() && VT == MVT::i32) ||
+       (Subtarget.hasSSE2() && VT == MVT::i64))) {
     SDValue N00 = N0.getOperand(0);
     SDValue N10 = N1.getOperand(0);
     EVT N00Type = N00.getValueType();
@@ -26153,7 +26153,7 @@ static SDValue convertIntLogicToFPLogic(
 
 static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
-                                 const X86Subtarget *Subtarget) {
+                                 const X86Subtarget &Subtarget) {
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
@@ -26175,7 +26175,7 @@ static SDValue PerformAndCombine(SDNode
   // BEXTR is ((X >> imm) & (2**size-1))
   if (VT == MVT::i32 || VT == MVT::i64) {
     // Check for BEXTR.
-    if ((Subtarget->hasBMI() || Subtarget->hasTBM()) &&
+    if ((Subtarget.hasBMI() || Subtarget.hasTBM()) &&
         (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)) {
       ConstantSDNode *MaskNode = dyn_cast<ConstantSDNode>(N1);
       ConstantSDNode *ShiftNode = dyn_cast<ConstantSDNode>(N0.getOperand(1));
@@ -26219,7 +26219,7 @@ static SDValue PerformAndCombine(SDNode
 
 static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
                                 TargetLowering::DAGCombinerInfo &DCI,
-                                const X86Subtarget *Subtarget) {
+                                const X86Subtarget &Subtarget) {
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
@@ -26235,8 +26235,8 @@ static SDValue PerformOrCombine(SDNode *
 
   // look for psign/blend
   if (VT == MVT::v2i64 || VT == MVT::v4i64) {
-    if (!Subtarget->hasSSSE3() ||
-        (VT == MVT::v4i64 && !Subtarget->hasInt256()))
+    if (!Subtarget.hasSSSE3() ||
+        (VT == MVT::v4i64 && !Subtarget.hasInt256()))
       return SDValue();
 
     // Canonicalize pandn to RHS
@@ -26297,7 +26297,7 @@ static SDValue PerformOrCombine(SDNode *
         return DAG.getBitcast(VT, Mask);
       }
       // PBLENDVB only available on SSE 4.1
-      if (!Subtarget->hasSSE41())
+      if (!Subtarget.hasSSE41())
         return SDValue();
 
       MVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
@@ -26321,7 +26321,7 @@ static SDValue PerformOrCombine(SDNode *
   // series of shifts/or that would otherwise be generated.
   // Don't fold (or (x << c) | (y >> (64 - c))) if SHLD/SHRD instructions
   // have higher latencies and we are not optimizing for size.
-  if (!OptForSize && Subtarget->isSHLDSlow())
+  if (!OptForSize && Subtarget.isSHLDSlow())
     return SDValue();
 
   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
@@ -26460,14 +26460,14 @@ static SDValue foldXorTruncShiftIntoCmp(
 
 static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
-                                 const X86Subtarget *Subtarget) {
+                                 const X86Subtarget &Subtarget) {
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
   if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))
     return RV;
 
-  if (Subtarget->hasCMov())
+  if (Subtarget.hasCMov())
     if (SDValue RV = performIntegerAbsCombine(N, DAG))
       return RV;
 
@@ -26481,7 +26481,7 @@ static SDValue PerformXorCombine(SDNode
 /// which is c = (a + b + 1) / 2, and replace this operation with the efficient
 /// X86ISD::AVG instruction.
 static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
-                                const X86Subtarget *Subtarget, SDLoc DL) {
+                                const X86Subtarget &Subtarget, SDLoc DL) {
   if (!VT.isVector() || !VT.isSimple())
     return SDValue();
   EVT InVT = In.getValueType();
@@ -26498,10 +26498,10 @@ static SDValue detectAVGPattern(SDValue
   if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits())
     return SDValue();
 
-  if (Subtarget->hasAVX512()) {
+  if (Subtarget.hasAVX512()) {
     if (VT.getSizeInBits() > 512)
       return SDValue();
-  } else if (Subtarget->hasAVX2()) {
+  } else if (Subtarget.hasAVX2()) {
     if (VT.getSizeInBits() > 256)
       return SDValue();
   } else {
@@ -26600,7 +26600,7 @@ static SDValue detectAVGPattern(SDValue
 /// PerformLOADCombine - Do target-specific dag combines on LOAD nodes.
 static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
                                   TargetLowering::DAGCombinerInfo &DCI,
-                                  const X86Subtarget *Subtarget) {
+                                  const X86Subtarget &Subtarget) {
   LoadSDNode *Ld = cast<LoadSDNode>(N);
   EVT RegVT = Ld->getValueType(0);
   EVT MemVT = Ld->getMemoryVT();
@@ -26652,7 +26652,7 @@ static SDValue PerformLOADCombine(SDNode
 /// PerformMLOADCombine - Resolve extending loads
 static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
                                    TargetLowering::DAGCombinerInfo &DCI,
-                                   const X86Subtarget *Subtarget) {
+                                   const X86Subtarget &Subtarget) {
   MaskedLoadSDNode *Mld = cast<MaskedLoadSDNode>(N);
   if (Mld->getExtensionType() != ISD::SEXTLOAD)
     return SDValue();
@@ -26731,7 +26731,7 @@ static SDValue PerformMLOADCombine(SDNod
 }
 /// PerformMSTORECombine - Resolve truncating stores
 static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
-                                    const X86Subtarget *Subtarget) {
+                                    const X86Subtarget &Subtarget) {
   MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N);
   if (!Mst->isTruncatingStore())
     return SDValue();
@@ -26820,7 +26820,7 @@ static SDValue PerformMSTORECombine(SDNo
 }
 /// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
 static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
-                                   const X86Subtarget *Subtarget) {
+                                   const X86Subtarget &Subtarget) {
   StoreSDNode *St = cast<StoreSDNode>(N);
   EVT VT = St->getValue().getValueType();
   EVT StVT = St->getMemoryVT();
@@ -26965,9 +26965,9 @@ static SDValue PerformSTORECombine(SDNod
   const Function *F = DAG.getMachineFunction().getFunction();
   bool NoImplicitFloatOps = F->hasFnAttribute(Attribute::NoImplicitFloat);
   bool F64IsLegal =
-      !Subtarget->useSoftFloat() && !NoImplicitFloatOps && Subtarget->hasSSE2();
+      !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2();
   if ((VT.isVector() ||
-       (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
+       (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit())) &&
       isa<LoadSDNode>(St->getValue()) &&
       !cast<LoadSDNode>(St->getValue())->isVolatile() &&
       St->getChain().hasOneUse() && !St->isVolatile()) {
@@ -27006,8 +27006,8 @@ static SDValue PerformSTORECombine(SDNod
     // If we are a 64-bit capable x86, lower to a single movq load/store pair.
     // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
     // pair instead.
-    if (Subtarget->is64Bit() || F64IsLegal) {
-      MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
+    if (Subtarget.is64Bit() || F64IsLegal) {
+      MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64;
       SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
                                   Ld->getPointerInfo(), Ld->isVolatile(),
                                   Ld->isNonTemporal(), Ld->isInvariant(),
@@ -27067,7 +27067,7 @@ static SDValue PerformSTORECombine(SDNod
   // to get past legalization. The execution dependencies fixup pass will
   // choose the optimal machine instruction for the store if this really is
   // an integer or v2f32 rather than an f64.
-  if (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit() &&
+  if (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit() &&
       St->getOperand(1).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
     SDValue OldExtract = St->getOperand(1);
     SDValue ExtOp0 = OldExtract.getOperand(0);
@@ -27212,14 +27212,14 @@ static bool isHorizontalBinOp(SDValue &L
 
 /// Do target-specific dag combines on floating point adds.
 static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
-                                  const X86Subtarget *Subtarget) {
+                                  const X86Subtarget &Subtarget) {
   EVT VT = N->getValueType(0);
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
 
   // Try to synthesize horizontal adds from adds of shuffles.
-  if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
-       (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
+  if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+       (Subtarget.hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
       isHorizontalBinOp(LHS, RHS, true))
     return DAG.getNode(X86ISD::FHADD, SDLoc(N), VT, LHS, RHS);
   return SDValue();
@@ -27227,14 +27227,14 @@ static SDValue PerformFADDCombine(SDNode
 
 /// Do target-specific dag combines on floating point subs.
 static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
-                                  const X86Subtarget *Subtarget) {
+                                  const X86Subtarget &Subtarget) {
   EVT VT = N->getValueType(0);
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
 
   // Try to synthesize horizontal subs from subs of shuffles.
-  if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
-       (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
+  if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+       (Subtarget.hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
       isHorizontalBinOp(LHS, RHS, false))
     return DAG.getNode(X86ISD::FHSUB, SDLoc(N), VT, LHS, RHS);
   return SDValue();
@@ -27327,7 +27327,7 @@ combineVectorTruncationWithPACKSS(SDNode
 /// element that is extracted from a vector and then truncated, and it is
 /// diffcult to do this optimization based on them.
 static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
-                                       const X86Subtarget *Subtarget) {
+                                       const X86Subtarget &Subtarget) {
   EVT OutVT = N->getValueType(0);
   if (!OutVT.isVector())
     return SDValue();
@@ -27342,7 +27342,7 @@ static SDValue combineVectorTruncation(S
   // TODO: On AVX2, the behavior of X86ISD::PACKUS is different from that on
   // SSE2, and we need to take care of it specially.
   // AVX512 provides vpmovdb.
-  if (!Subtarget->hasSSE2() || Subtarget->hasAVX2())
+  if (!Subtarget.hasSSE2() || Subtarget.hasAVX2())
     return SDValue();
 
   EVT OutSVT = OutVT.getVectorElementType();
@@ -27353,7 +27353,7 @@ static SDValue combineVectorTruncation(S
     return SDValue();
 
   // SSSE3's pshufb results in less instructions in the cases below.
-  if (Subtarget->hasSSSE3() && NumElems == 8 &&
+  if (Subtarget.hasSSSE3() && NumElems == 8 &&
       ((OutSVT == MVT::i8 && InSVT != MVT::i64) ||
        (InSVT == MVT::i32 && OutSVT == MVT::i16)))
     return SDValue();
@@ -27373,7 +27373,7 @@ static SDValue combineVectorTruncation(S
   // SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS
   // for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to
   // truncate 2 x v4i32 to v8i16.
-  if (Subtarget->hasSSE41() || OutSVT == MVT::i8)
+  if (Subtarget.hasSSE41() || OutSVT == MVT::i8)
     return combineVectorTruncationWithPACKUS(N, DAG, SubVec);
   else if (InSVT == MVT::i32)
     return combineVectorTruncationWithPACKSS(N, DAG, SubVec);
@@ -27382,7 +27382,7 @@ static SDValue combineVectorTruncation(S
 }
 
 static SDValue PerformTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
-                                      const X86Subtarget *Subtarget) {
+                                      const X86Subtarget &Subtarget) {
   // Try to detect AVG pattern first.
   SDValue Avg = detectAVGPattern(N->getOperand(0), N->getValueType(0), DAG,
                                  Subtarget, SDLoc(N));
@@ -27394,7 +27394,7 @@ static SDValue PerformTRUNCATECombine(SD
 
 /// Do target-specific dag combines on floating point negations.
 static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG,
-                                  const X86Subtarget *Subtarget) {
+                                  const X86Subtarget &Subtarget) {
   EVT VT = N->getValueType(0);
   EVT SVT = VT.getScalarType();
   SDValue Arg = N->getOperand(0);
@@ -27408,7 +27408,7 @@ static SDValue PerformFNEGCombine(SDNode
   // use of a constant by performing (-0 - A*B) instead.
   // FIXME: Check rounding control flags as well once it becomes available.
   if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) &&
-      Arg->getFlags()->hasNoSignedZeros() && Subtarget->hasAnyFMA()) {
+      Arg->getFlags()->hasNoSignedZeros() && Subtarget.hasAnyFMA()) {
     SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
     return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
                        Arg.getOperand(1), Zero);
@@ -27436,9 +27436,9 @@ static SDValue PerformFNEGCombine(SDNode
 }
 
 static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
-                              const X86Subtarget *Subtarget) {
+                              const X86Subtarget &Subtarget) {
   EVT VT = N->getValueType(0);
-  if (VT.is512BitVector() && !Subtarget->hasDQI()) {
+  if (VT.is512BitVector() && !Subtarget.hasDQI()) {
     // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extention.
     // These logic operations may be executed in the integer domain.
     SDLoc dl(N);
@@ -27462,7 +27462,7 @@ static SDValue lowerX86FPLogicOp(SDNode
 }
 /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
 static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG,
-                                 const X86Subtarget *Subtarget) {
+                                 const X86Subtarget &Subtarget) {
   assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
 
   // F[X]OR(0.0, x) -> x
@@ -27500,8 +27500,8 @@ static SDValue PerformFMinFMaxCombine(SD
 }
 
 static SDValue performFMinNumFMaxNumCombine(SDNode *N, SelectionDAG &DAG,
-                                            const X86Subtarget *Subtarget) {
-  if (Subtarget->useSoftFloat())
+                                            const X86Subtarget &Subtarget) {
+  if (Subtarget.useSoftFloat())
     return SDValue();
 
   // TODO: Check for global or instruction-level "nnan". In that case, we
@@ -27510,9 +27510,9 @@ static SDValue performFMinNumFMaxNumComb
   //       should be an optional swap and FMAX/FMIN.
 
   EVT VT = N->getValueType(0);
-  if (!((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
-        (Subtarget->hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) ||
-        (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))))
+  if (!((Subtarget.hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
+        (Subtarget.hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) ||
+        (Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))))
     return SDValue();
 
   // This takes at least 3 instructions, so favor a library call when operating
@@ -27557,7 +27557,7 @@ static SDValue performFMinNumFMaxNumComb
 
 /// Do target-specific dag combines on X86ISD::FAND nodes.
 static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG,
-                                  const X86Subtarget *Subtarget) {
+                                  const X86Subtarget &Subtarget) {
   // FAND(0.0, x) -> 0.0
   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
     if (C->getValueAPF().isPosZero())
@@ -27573,7 +27573,7 @@ static SDValue PerformFANDCombine(SDNode
 
 /// Do target-specific dag combines on X86ISD::FANDN nodes
 static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG,
-                                   const X86Subtarget *Subtarget) {
+                                   const X86Subtarget &Subtarget) {
   // FANDN(0.0, x) -> x
   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
     if (C->getValueAPF().isPosZero())
@@ -27620,7 +27620,7 @@ static SDValue PerformVZEXT_MOVLCombine(
 }
 
 static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
-                                               const X86Subtarget *Subtarget) {
+                                               const X86Subtarget &Subtarget) {
   EVT VT = N->getValueType(0);
   if (!VT.isVector())
     return SDValue();
@@ -27641,7 +27641,7 @@ static SDValue PerformSIGN_EXTEND_INREGC
 
     // EXTLOAD has a better solution on AVX2,
     // it may be replaced with X86ISD::VSEXT node.
-    if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256())
+    if (N00.getOpcode() == ISD::LOAD && Subtarget.hasInt256())
       if (!ISD::isNormalLoad(N00.getNode()))
         return SDValue();
 
@@ -27659,7 +27659,7 @@ static SDValue PerformSIGN_EXTEND_INREGC
 /// to combine math ops, use an LEA, or use a complex addressing mode. This can
 /// eliminate extend, add, and shift instructions.
 static SDValue promoteSextBeforeAddNSW(SDNode *Sext, SelectionDAG &DAG,
-                                       const X86Subtarget *Subtarget) {
+                                       const X86Subtarget &Subtarget) {
   // TODO: This should be valid for other integer types.
   EVT VT = Sext->getValueType(0);
   if (VT != MVT::i64)
@@ -27733,7 +27733,7 @@ static SDValue getDivRem8(SDNode *N, Sel
 
 static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
                                   TargetLowering::DAGCombinerInfo &DCI,
-                                  const X86Subtarget *Subtarget) {
+                                  const X86Subtarget &Subtarget) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
   EVT SVT = VT.getScalarType();
@@ -27754,7 +27754,7 @@ static SDValue PerformSExtCombine(SDNode
     return SDValue();
   }
 
-  if (VT.isVector() && Subtarget->hasSSE2()) {
+  if (VT.isVector() && Subtarget.hasSSE2()) {
     auto ExtendVecSize = [&DAG](SDLoc DL, SDValue N, unsigned Size) {
       EVT InVT = N.getValueType();
       EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
@@ -27790,7 +27790,7 @@ static SDValue PerformSExtCombine(SDNode
 
     // On pre-AVX2 targets, split into 128-bit nodes of
     // ISD::SIGN_EXTEND_VECTOR_INREG.
-    if (!Subtarget->hasInt256() && !(VT.getSizeInBits() % 128) &&
+    if (!Subtarget.hasInt256() && !(VT.getSizeInBits() % 128) &&
         (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
         (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
       unsigned NumVecs = VT.getSizeInBits() / 128;
@@ -27811,7 +27811,7 @@ static SDValue PerformSExtCombine(SDNode
     }
   }
 
-  if (Subtarget->hasAVX() && VT.is256BitVector())
+  if (Subtarget.hasAVX() && VT.is256BitVector())
     if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
       return R;
 
@@ -27822,7 +27822,7 @@ static SDValue PerformSExtCombine(SDNode
 }
 
 static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
-                                 const X86Subtarget* Subtarget) {
+                                 const X86Subtarget &Subtarget) {
   SDLoc dl(N);
   EVT VT = N->getValueType(0);
 
@@ -27831,7 +27831,7 @@ static SDValue PerformFMACombine(SDNode
     return SDValue();
 
   EVT ScalarVT = VT.getScalarType();
-  if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasAnyFMA())
+  if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA())
     return SDValue();
 
   SDValue A = N->getOperand(0);
@@ -27862,7 +27862,7 @@ static SDValue PerformFMACombine(SDNode
 
 static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
                                   TargetLowering::DAGCombinerInfo &DCI,
-                                  const X86Subtarget *Subtarget) {
+                                  const X86Subtarget &Subtarget) {
   // (i32 zext (and (i8  x86isd::setcc_carry), 1)) ->
   //           (and (i32 x86isd::setcc_carry), 1)
   // This eliminates the zext. This transformation is necessary because
@@ -27910,7 +27910,7 @@ static SDValue PerformZExtCombine(SDNode
 // Optimize x == -y --> x+y == 0
 //          x != -y --> x+y != 0
 static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
-                                      const X86Subtarget* Subtarget) {
+                                      const X86Subtarget &Subtarget) {
   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
@@ -28003,7 +28003,7 @@ static SDValue MaterializeSETB(SDLoc DL,
 // Optimize  RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
 static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
                                    TargetLowering::DAGCombinerInfo &DCI,
-                                   const X86Subtarget *Subtarget) {
+                                   const X86Subtarget &Subtarget) {
   SDLoc DL(N);
   X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
   SDValue EFLAGS = N->getOperand(1);
@@ -28044,7 +28044,7 @@ static SDValue PerformSETCCCombine(SDNod
 //
 static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
                                     TargetLowering::DAGCombinerInfo &DCI,
-                                    const X86Subtarget *Subtarget) {
+                                    const X86Subtarget &Subtarget) {
   SDLoc DL(N);
   SDValue Chain = N->getOperand(0);
   SDValue Dest = N->getOperand(1);
@@ -28107,7 +28107,7 @@ static SDValue performVectorCompareAndMa
 }
 
 static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
-                                        const X86Subtarget *Subtarget) {
+                                        const X86Subtarget &Subtarget) {
   SDValue Op0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
   EVT InVT = Op0.getValueType();
@@ -28132,7 +28132,7 @@ static SDValue PerformUINT_TO_FPCombine(
 }
 
 static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
-                                        const X86Subtarget *Subtarget) {
+                                        const X86Subtarget &Subtarget) {
   // First try to optimize away the conversion entirely when it's
   // conditionally from a constant. Vectors only.
   if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
@@ -28156,7 +28156,7 @@ static SDValue PerformSINT_TO_FPCombine(
 
   // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
   // a 32-bit target where SSE doesn't support i64->FP operations.
-  if (!Subtarget->useSoftFloat() && Op0.getOpcode() == ISD::LOAD) {
+  if (!Subtarget.useSoftFloat() && Op0.getOpcode() == ISD::LOAD) {
     LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
     EVT LdVT = Ld->getValueType(0);
 
@@ -28166,8 +28166,8 @@ static SDValue PerformSINT_TO_FPCombine(
 
     if (!Ld->isVolatile() && !VT.isVector() &&
         ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
-        !Subtarget->is64Bit() && LdVT == MVT::i64) {
-      SDValue FILDChain = Subtarget->getTargetLowering()->BuildFILD(
+        !Subtarget.is64Bit() && LdVT == MVT::i64) {
+      SDValue FILDChain = Subtarget.getTargetLowering()->BuildFILD(
           SDValue(N, 0), LdVT, Ld->getChain(), Op0, DAG);
       DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
       return FILDChain;
@@ -28245,14 +28245,14 @@ static SDValue OptimizeConditionalInDecr
 
 /// PerformADDCombine - Do target-specific dag combines on integer adds.
 static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG,
-                                 const X86Subtarget *Subtarget) {
+                                 const X86Subtarget &Subtarget) {
   EVT VT = N->getValueType(0);
   SDValue Op0 = N->getOperand(0);
   SDValue Op1 = N->getOperand(1);
 
   // Try to synthesize horizontal adds from adds of shuffles.
-  if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
-       (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+  if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
+       (Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
       isHorizontalBinOp(Op0, Op1, true))
     return DAG.getNode(X86ISD::HADD, SDLoc(N), VT, Op0, Op1);
 
@@ -28260,7 +28260,7 @@ static SDValue PerformAddCombine(SDNode
 }
 
 static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
-                                 const X86Subtarget *Subtarget) {
+                                 const X86Subtarget &Subtarget) {
   SDValue Op0 = N->getOperand(0);
   SDValue Op1 = N->getOperand(1);
 
@@ -28284,8 +28284,8 @@ static SDValue PerformSubCombine(SDNode
 
   // Try to synthesize horizontal adds from adds of shuffles.
   EVT VT = N->getValueType(0);
-  if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
-       (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+  if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
+       (Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
       isHorizontalBinOp(Op0, Op1, true))
     return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);
 
@@ -28295,7 +28295,7 @@ static SDValue PerformSubCombine(SDNode
 /// performVZEXTCombine - Performs build vector combines
 static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
                                    TargetLowering::DAGCombinerInfo &DCI,
-                                   const X86Subtarget *Subtarget) {
+                                   const X86Subtarget &Subtarget) {
   SDLoc DL(N);
   MVT VT = N->getSimpleValueType(0);
   SDValue Op = N->getOperand(0);
@@ -28738,13 +28738,13 @@ TargetLowering::ConstraintWeight
       weight = CW_SpecificReg;
     break;
   case 'y':
-    if (type->isX86_MMXTy() && Subtarget->hasMMX())
+    if (type->isX86_MMXTy() && Subtarget.hasMMX())
       weight = CW_SpecificReg;
     break;
   case 'x':
   case 'Y':
-    if (((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) ||
-        ((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasFp256()))
+    if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) ||
+        ((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasFp256()))
       weight = CW_Register;
     break;
   case 'I':
@@ -28814,9 +28814,9 @@ LowerXConstraint(EVT ConstraintVT) const
   // FP X constraints get lowered to SSE1/2 registers if available, otherwise
   // 'f' like normal targets.
   if (ConstraintVT.isFloatingPoint()) {
-    if (Subtarget->hasSSE2())
+    if (Subtarget.hasSSE2())
       return "Y";
-    if (Subtarget->hasSSE1())
+    if (Subtarget.hasSSE1())
       return "x";
   }
 
@@ -28867,7 +28867,7 @@ void X86TargetLowering::LowerAsmOperandF
   case 'L':
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
       if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff ||
-          (Subtarget->is64Bit() && C->getZExtValue() == 0xffffffff)) {
+          (Subtarget.is64Bit() && C->getZExtValue() == 0xffffffff)) {
         Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
                                        Op.getValueType());
         break;
@@ -28940,7 +28940,7 @@ void X86TargetLowering::LowerAsmOperandF
     // In any sort of PIC mode addresses need to be computed at runtime by
     // adding in a register or some sort of table lookup.  These can't
     // be used as immediates.
-    if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC())
+    if (Subtarget.isPICStyleGOT() || Subtarget.isPICStyleStubPIC())
       return;
 
     // If we are in non-pic codegen mode, we allow the address of a global (with
@@ -28975,7 +28975,7 @@ void X86TargetLowering::LowerAsmOperandF
     // If we require an extra load to get this address, as in PIC mode, we
     // can't accept it.
     if (isGlobalStubReference(
-            Subtarget->ClassifyGlobalReference(GV, DAG.getTarget())))
+            Subtarget.ClassifyGlobalReference(GV, DAG.getTarget())))
       return;
 
     Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op),
@@ -29005,7 +29005,7 @@ X86TargetLowering::getRegForInlineAsmCon
       // RIP in the class. Do they matter any more here than they do
       // in the normal allocation?
     case 'q':   // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
-      if (Subtarget->is64Bit()) {
+      if (Subtarget.is64Bit()) {
         if (VT == MVT::i32 || VT == MVT::f32)
           return std::make_pair(0U, &X86::GR32RegClass);
         if (VT == MVT::i16)
@@ -29033,7 +29033,7 @@ X86TargetLowering::getRegForInlineAsmCon
         return std::make_pair(0U, &X86::GR8RegClass);
       if (VT == MVT::i16)
         return std::make_pair(0U, &X86::GR16RegClass);
-      if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget->is64Bit())
+      if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget.is64Bit())
         return std::make_pair(0U, &X86::GR32RegClass);
       return std::make_pair(0U, &X86::GR64RegClass);
     case 'R':   // LEGACY_REGS
@@ -29041,7 +29041,7 @@ X86TargetLowering::getRegForInlineAsmCon
         return std::make_pair(0U, &X86::GR8_NOREXRegClass);
       if (VT == MVT::i16)
         return std::make_pair(0U, &X86::GR16_NOREXRegClass);
-      if (VT == MVT::i32 || !Subtarget->is64Bit())
+      if (VT == MVT::i32 || !Subtarget.is64Bit())
         return std::make_pair(0U, &X86::GR32_NOREXRegClass);
       return std::make_pair(0U, &X86::GR64_NOREXRegClass);
     case 'f':  // FP Stack registers.
@@ -29053,13 +29053,13 @@ X86TargetLowering::getRegForInlineAsmCon
         return std::make_pair(0U, &X86::RFP64RegClass);
       return std::make_pair(0U, &X86::RFP80RegClass);
     case 'y':   // MMX_REGS if MMX allowed.
-      if (!Subtarget->hasMMX()) break;
+      if (!Subtarget.hasMMX()) break;
       return std::make_pair(0U, &X86::VR64RegClass);
     case 'Y':   // SSE_REGS if SSE2 allowed
-      if (!Subtarget->hasSSE2()) break;
+      if (!Subtarget.hasSSE2()) break;
       // FALL THROUGH.
     case 'x':   // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
-      if (!Subtarget->hasSSE1()) break;
+      if (!Subtarget.hasSSE1()) break;
 
       switch (VT.SimpleTy) {
       default: break;
@@ -29242,7 +29242,7 @@ bool X86TargetLowering::isIntDivCheap(EV
 }
 
 void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
-  if (!Subtarget->is64Bit())
+  if (!Subtarget.is64Bit())
     return;
 
   // Update IsSplitCSR in X86MachineFunctionInfo.
@@ -29254,12 +29254,12 @@ void X86TargetLowering::initializeSplitC
 void X86TargetLowering::insertCopiesSplitCSR(
     MachineBasicBlock *Entry,
     const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
-  const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
   const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
   if (!IStart)
     return;
 
-  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
   MachineBasicBlock::iterator MBBI = Entry->begin();
   for (const MCPhysReg *I = IStart; *I; ++I) {

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=258867&r1=258866&r2=258867&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Jan 26 16:08:58 2016
@@ -955,9 +955,9 @@ namespace llvm {
                             MVT VT) const override;
 
   private:
-    /// Keep a pointer to the X86Subtarget around so that we can
+    /// Keep a reference to the X86Subtarget around so that we can
     /// make the right decision when generating code for different targets.
-    const X86Subtarget *Subtarget;
+    const X86Subtarget &Subtarget;
 
     /// Select between SSE or x87 floating point ops.
     /// When SSE is available, use it for f32 operations.




More information about the llvm-commits mailing list