[clang] [llvm] [mlir] [IRBuilder] Refactor for intrinsics const-folding (NFC) (PR #202738)

Ramkumar Ramachandra via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 11 06:00:36 PDT 2026


https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/202738

>From aacc085d1205c82b5535bcc5f60e0677eb9492e4 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Tue, 9 Jun 2026 15:07:07 +0100
Subject: [PATCH 1/3] [IRBuilder] Refactor for intrinsics const-folding (NFC)

In preparation to const-fold intrinsic calls, refactor the IRBuilder
API, generalizing it to return possibly constant-folded values.
---
 clang/lib/CodeGen/CGHLSLBuiltins.cpp          |   2 +-
 clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp   |   5 +-
 clang/lib/CodeGen/TargetBuiltins/SPIR.cpp     |   2 +-
 llvm/include/llvm/IR/IRBuilder.h              | 150 ++++++-----
 llvm/lib/CodeGen/InlineAsmPrepare.cpp         |   2 +-
 llvm/lib/CodeGen/SafeStack.cpp                |   2 +-
 llvm/lib/IR/IRBuilder.cpp                     | 232 ++++++++++--------
 .../Target/AArch64/AArch64ISelLowering.cpp    |   2 +-
 .../Target/AArch64/AArch64StackTagging.cpp    |  14 +-
 .../AArch64/AArch64TargetTransformInfo.cpp    |   2 +-
 llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp  |   2 +-
 .../AMDGPU/AMDGPUAsanInstrumentation.cpp      |   2 +-
 .../Target/AMDGPU/AMDGPUAtomicOptimizer.cpp   |   4 +-
 .../Target/AMDGPU/AMDGPUCodeGenPrepare.cpp    |   3 +-
 .../AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp  |   3 +-
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp     |  13 +-
 .../AMDGPU/AMDGPULowerBufferFatPointers.cpp   |  12 +-
 .../Target/AMDGPU/AMDGPULowerIntrinsics.cpp   |  28 ++-
 .../AMDGPU/AMDGPULowerKernelArguments.cpp     |   6 +-
 .../lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp |  12 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |   8 +-
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |  15 +-
 .../Target/ARM/MVEGatherScatterLowering.cpp   |  81 +++---
 .../Target/DirectX/DXILIntrinsicExpansion.cpp |  18 +-
 llvm/lib/Target/DirectX/DXILOpLowering.cpp    |   2 +-
 .../Target/Hexagon/HexagonVectorCombine.cpp   |   2 +-
 llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp      |   2 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |   8 +-
 .../Target/RISCV/RISCVInterleavedAccess.cpp   |   2 +-
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp |  89 +++----
 llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp |   2 +-
 .../Target/SPIRV/SPIRVLegalizePointerCast.cpp |   6 +-
 .../Target/SPIRV/SPIRVPrepareFunctions.cpp    |   9 +-
 llvm/lib/Target/SPIRV/SPIRVUtils.cpp          |   2 +-
 llvm/lib/Target/SystemZ/SystemZTDC.cpp        |   2 +-
 llvm/lib/Target/X86/X86LowerAMXType.cpp       |  11 +-
 llvm/lib/Transforms/IPO/FatLTOCleanup.cpp     |   2 +-
 .../InstCombine/InstCombineAndOrXor.cpp       |   2 +-
 .../InstCombine/InstCombineCalls.cpp          |   5 +-
 .../InstCombine/InstCombineCompares.cpp       |   4 +-
 .../InstCombine/InstCombineMulDivRem.cpp      |  10 +-
 .../InstCombine/InstCombineSelect.cpp         |   6 +-
 .../Instrumentation/BoundsChecking.cpp        |   6 +-
 .../Instrumentation/MemorySanitizer.cpp       |  31 ++-
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  |  29 +--
 .../Utils/ScalarEvolutionExpander.cpp         |   6 +-
 .../lib/Transforms/Utils/SimplifyLibCalls.cpp |   2 +-
 .../Transforms/Vectorize/SLPVectorizer.cpp    |   8 +-
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp |  40 +--
 .../Transforms/Vectorize/VectorCombine.cpp    |  24 +-
 llvm/unittests/IR/IRBuilderTest.cpp           |  38 +--
 llvm/unittests/IR/IntrinsicsTest.cpp          |  11 +-
 llvm/unittests/Transforms/Utils/LocalTest.cpp |   2 +-
 mlir/lib/Target/LLVMIR/ModuleTranslation.cpp  |   4 +-
 54 files changed, 531 insertions(+), 456 deletions(-)

diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 920816bd3d3f8..6c6bd774abeda 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -124,7 +124,7 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
 
     auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy);
 
-    CallInst *CI = CGF->Builder.CreateIntrinsic(
+    Value *CI = CGF->Builder.CreateIntrinsic(
         RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble");
 
     LowBits = CGF->Builder.CreateExtractValue(CI, 0);
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 21f32b12c4fd1..b2e9325015b51 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -341,7 +341,8 @@ emitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF,
   }
 
   llvm::Type *RetTy = IsImageStore ? CGF.VoidTy : CGF.ConvertType(E->getType());
-  llvm::CallInst *Call = CGF.Builder.CreateIntrinsic(RetTy, IntrinsicID, Args);
+  llvm::CallInst *Call =
+      CGF.Builder.CreateIntrinsicWithoutFolding(RetTy, IntrinsicID, Args);
   return Call;
 }
 
@@ -463,7 +464,7 @@ static Value *GetAMDGPUPredicate(CodeGenFunction &CGF, Twine Name) {
   MDNode *Predicate = MDNode::get(Ctx, MDString::get(Ctx, Name.str()));
   std::vector<Value *> Args = {SpecId, ConstantInt::getFalse(Ctx),
                                MetadataAsValue::get(Ctx, Predicate)};
-  CallInst *Call = CGF.Builder.CreateIntrinsic(
+  Value *Call = CGF.Builder.CreateIntrinsic(
       Intrinsic::spv_named_boolean_spec_constant, Args);
 
   return Call;
diff --git a/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp b/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp
index ff7b5fefedd19..b2732e2ae674e 100644
--- a/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp
@@ -103,7 +103,7 @@ Value *CodeGenFunction::EmitSPIRVBuiltinExpr(unsigned BuiltinID,
     llvm::Type *Res = getTypes().ConvertType(E->getType());
     assert(Res->isPointerTy() &&
            "GenericCastToPtrExplicit doesn't return a pointer");
-    llvm::CallInst *Call = Builder.CreateIntrinsic(
+    llvm::CallInst *Call = Builder.CreateIntrinsicWithoutFolding(
         /*ReturnType=*/Res, Intrinsic::spv_generic_cast_to_ptr_explicit,
         ArrayRef<Value *>{Ptr}, nullptr, "spv.generic_cast");
     Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef);
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 99e4b748425c0..d0e80c983a871 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -785,61 +785,61 @@ class IRBuilderBase {
       uint32_t ElementSize, const AAMDNodes &AAInfo = AAMDNodes());
 
 private:
-  CallInst *getReductionIntrinsic(Intrinsic::ID ID, Value *Src);
+  Value *getReductionIntrinsic(Intrinsic::ID ID, Value *Src);
 
 public:
   /// Create a sequential vector fadd reduction intrinsic of the source vector.
   /// The first parameter is a scalar accumulator value. An unordered reduction
   /// can be created by adding the reassoc fast-math flag to the resulting
   /// sequential reduction.
-  LLVM_ABI CallInst *CreateFAddReduce(Value *Acc, Value *Src);
+  LLVM_ABI Value *CreateFAddReduce(Value *Acc, Value *Src);
 
   /// Create a sequential vector fmul reduction intrinsic of the source vector.
   /// The first parameter is a scalar accumulator value. An unordered reduction
   /// can be created by adding the reassoc fast-math flag to the resulting
   /// sequential reduction.
-  LLVM_ABI CallInst *CreateFMulReduce(Value *Acc, Value *Src);
+  LLVM_ABI Value *CreateFMulReduce(Value *Acc, Value *Src);
 
   /// Create a vector int add reduction intrinsic of the source vector.
-  LLVM_ABI CallInst *CreateAddReduce(Value *Src);
+  LLVM_ABI Value *CreateAddReduce(Value *Src);
 
   /// Create a vector int mul reduction intrinsic of the source vector.
-  LLVM_ABI CallInst *CreateMulReduce(Value *Src);
+  LLVM_ABI Value *CreateMulReduce(Value *Src);
 
   /// Create a vector int AND reduction intrinsic of the source vector.
-  LLVM_ABI CallInst *CreateAndReduce(Value *Src);
+  LLVM_ABI Value *CreateAndReduce(Value *Src);
 
   /// Create a vector int OR reduction intrinsic of the source vector.
-  LLVM_ABI CallInst *CreateOrReduce(Value *Src);
+  LLVM_ABI Value *CreateOrReduce(Value *Src);
 
   /// Create a vector int XOR reduction intrinsic of the source vector.
-  LLVM_ABI CallInst *CreateXorReduce(Value *Src);
+  LLVM_ABI Value *CreateXorReduce(Value *Src);
 
   /// Create a vector integer max reduction intrinsic of the source
   /// vector.
-  LLVM_ABI CallInst *CreateIntMaxReduce(Value *Src, bool IsSigned = false);
+  LLVM_ABI Value *CreateIntMaxReduce(Value *Src, bool IsSigned = false);
 
   /// Create a vector integer min reduction intrinsic of the source
   /// vector.
-  LLVM_ABI CallInst *CreateIntMinReduce(Value *Src, bool IsSigned = false);
+  LLVM_ABI Value *CreateIntMinReduce(Value *Src, bool IsSigned = false);
 
   /// Create a vector float max reduction intrinsic of the source
   /// vector.
-  LLVM_ABI CallInst *CreateFPMaxReduce(Value *Src);
+  LLVM_ABI Value *CreateFPMaxReduce(Value *Src);
 
   /// Create a vector float min reduction intrinsic of the source
   /// vector.
-  LLVM_ABI CallInst *CreateFPMinReduce(Value *Src);
+  LLVM_ABI Value *CreateFPMinReduce(Value *Src);
 
   /// Create a vector float maximum reduction intrinsic of the source
   /// vector. This variant follows the NaN and signed zero semantic of
   /// llvm.maximum intrinsic.
-  LLVM_ABI CallInst *CreateFPMaximumReduce(Value *Src);
+  LLVM_ABI Value *CreateFPMaximumReduce(Value *Src);
 
   /// Create a vector float minimum reduction intrinsic of the source
   /// vector. This variant follows the NaN and signed zero semantic of
   /// llvm.minimum intrinsic.
-  LLVM_ABI CallInst *CreateFPMinimumReduce(Value *Src);
+  LLVM_ABI Value *CreateFPMinimumReduce(Value *Src);
 
   /// Create a lifetime.start intrinsic.
   LLVM_ABI CallInst *CreateLifetimeStart(Value *Ptr);
@@ -1020,28 +1020,57 @@ class IRBuilderBase {
 
   /// Create a call to intrinsic \p ID with \p Args, mangled using
   /// \p OverloadTypes. If \p FMFSource is provided, copy fast-math-flags from
-  /// that instruction to the intrinsic.
-  LLVM_ABI CallInst *CreateIntrinsic(Intrinsic::ID ID,
-                                     ArrayRef<Type *> OverloadTypes,
-                                     ArrayRef<Value *> Args,
-                                     FMFSource FMFSource = {},
-                                     const Twine &Name = "",
-                                     ArrayRef<OperandBundleDef> OpBundles = {});
+  /// that instruction to the intrinsic. It is guaranteed not to fold.
+  LLVM_ABI CallInst *CreateIntrinsicWithoutFolding(
+      Intrinsic::ID ID, ArrayRef<Type *> OverloadTypes, ArrayRef<Value *> Args,
+      FMFSource FMFSource = {}, const Twine &Name = "",
+      ArrayRef<OperandBundleDef> OpBundles = {});
 
   /// Create a call to intrinsic \p ID with \p RetTy and \p Args. If
   /// \p FMFSource is provided, copy fast-math-flags from that instruction to
-  /// the intrinsic.
-  LLVM_ABI CallInst *CreateIntrinsic(Type *RetTy, Intrinsic::ID ID,
-                                     ArrayRef<Value *> Args,
-                                     FMFSource FMFSource = {},
-                                     const Twine &Name = "");
+  /// the intrinsic. It is guaranteed not to fold.
+  LLVM_ABI CallInst *CreateIntrinsicWithoutFolding(Type *RetTy,
+                                                   Intrinsic::ID ID,
+                                                   ArrayRef<Value *> Args,
+                                                   FMFSource FMFSource = {},
+                                                   const Twine &Name = "");
 
   /// Create a call to non-overloaded intrinsic \p ID with \p Args. If
   /// \p FMFSource is provided, copy fast-math-flags from that instruction to
-  /// the intrinsic.
-  CallInst *CreateIntrinsic(Intrinsic::ID ID, ArrayRef<Value *> Args,
-                            FMFSource FMFSource = {}, const Twine &Name = "") {
-    return CreateIntrinsic(ID, /*Types=*/{}, Args, FMFSource, Name);
+  /// the intrinsic. It is guranteed not to fold.
+  LLVM_ABI CallInst *CreateIntrinsicWithoutFolding(Intrinsic::ID ID,
+                                                   ArrayRef<Value *> Args,
+                                                   FMFSource FMFSource = {},
+                                                   const Twine &Name = "") {
+    return CreateIntrinsicWithoutFolding(ID, /*Types=*/{}, Args, FMFSource,
+                                         Name);
+  }
+
+  /// Variant to create a possibly constant-folded intrinsic. An optional \p
+  /// SetFn is called if the intrinsic doesn't fold, and can be used to set
+  /// things like attributes and debug-loc.
+  LLVM_ABI Value *CreateIntrinsic(
+      Intrinsic::ID ID, ArrayRef<Type *> OverloadTypes, ArrayRef<Value *> Args,
+      FMFSource FMFSource = {}, const Twine &Name = "",
+      ArrayRef<OperandBundleDef> OpBundles = {},
+      std::function<void(CallInst *)> SetFn = [](CallInst *) {});
+
+  /// Variant to create a possibly constant-folded intrinsic. An optional \p
+  /// SetFn is called if the intrinsic doesn't fold, and can be used to set
+  /// things like attributes and debug-loc.
+  LLVM_ABI Value *CreateIntrinsic(
+      Type *RetTy, Intrinsic::ID ID, ArrayRef<Value *> Args,
+      FMFSource FMFSource = {}, const Twine &Name = "",
+      std::function<void(CallInst *)> SetFn = [](CallInst *) {});
+
+  /// Variant to create a possibly constant-folded intrinsic. An optional \p
+  /// SetFn is called if the intrinsic doesn't fold, and can be used to set
+  /// things like attributes and debug-loc.
+  LLVM_ABI Value *CreateIntrinsic(
+      Intrinsic::ID ID, ArrayRef<Value *> Args, FMFSource FMFSource = {},
+      const Twine &Name = "",
+      std::function<void(CallInst *)> SetFn = [](CallInst *) {}) {
+    return CreateIntrinsic(ID, /*Types=*/{}, Args, FMFSource, Name, {}, SetFn);
   }
 
   /// Create call to the fabs intrinsic.
@@ -1125,51 +1154,52 @@ class IRBuilderBase {
   }
 
   /// Create a call to the arithmetic_fence intrinsic.
-  CallInst *CreateArithmeticFence(Value *Val, Type *DstType,
-                                  const Twine &Name = "") {
+  Value *CreateArithmeticFence(Value *Val, Type *DstType,
+                               const Twine &Name = "") {
     return CreateIntrinsic(Intrinsic::arithmetic_fence, DstType, Val, nullptr,
                            Name);
   }
 
   /// Create a call to the vector.extract intrinsic.
-  CallInst *CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx,
-                                const Twine &Name = "") {
+  Value *CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx,
+                             const Twine &Name = "") {
     return CreateIntrinsic(Intrinsic::vector_extract,
                            {DstType, SrcVec->getType()}, {SrcVec, Idx}, nullptr,
                            Name);
   }
 
   /// Create a call to the vector.extract intrinsic.
-  CallInst *CreateExtractVector(Type *DstType, Value *SrcVec, uint64_t Idx,
-                                const Twine &Name = "") {
+  Value *CreateExtractVector(Type *DstType, Value *SrcVec, uint64_t Idx,
+                             const Twine &Name = "") {
     return CreateExtractVector(DstType, SrcVec, getInt64(Idx), Name);
   }
 
   /// Create a call to the vector.insert intrinsic.
-  CallInst *CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec,
-                               Value *Idx, const Twine &Name = "") {
+  Value *CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec,
+                            Value *Idx, const Twine &Name = "") {
     return CreateIntrinsic(Intrinsic::vector_insert,
                            {DstType, SubVec->getType()}, {SrcVec, SubVec, Idx},
                            nullptr, Name);
   }
 
   /// Create a call to the vector.extract intrinsic.
-  CallInst *CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec,
-                               uint64_t Idx, const Twine &Name = "") {
+  Value *CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec,
+                            uint64_t Idx, const Twine &Name = "") {
     return CreateInsertVector(DstType, SrcVec, SubVec, getInt64(Idx), Name);
   }
 
   /// Create a call to llvm.stacksave
   CallInst *CreateStackSave(const Twine &Name = "") {
     const DataLayout &DL = BB->getDataLayout();
-    return CreateIntrinsic(Intrinsic::stacksave, {DL.getAllocaPtrType(Context)},
-                           {}, nullptr, Name);
+    return CreateIntrinsicWithoutFolding(Intrinsic::stacksave,
+                                         {DL.getAllocaPtrType(Context)}, {},
+                                         nullptr, Name);
   }
 
   /// Create a call to llvm.stackrestore
   CallInst *CreateStackRestore(Value *Ptr, const Twine &Name = "") {
-    return CreateIntrinsic(Intrinsic::stackrestore, {Ptr->getType()}, {Ptr},
-                           nullptr, Name);
+    return CreateIntrinsicWithoutFolding(
+        Intrinsic::stackrestore, {Ptr->getType()}, {Ptr}, nullptr, Name);
   }
 
   /// Create a call to llvm.experimental_cttz_elts
@@ -1832,19 +1862,19 @@ class IRBuilderBase {
   /// the created intrinsic call according to \p Rounding and \p
   /// Except and it sets \p FPMathTag as the 'fpmath' metadata, using
   /// defaults if a value equals nullopt/null.
-  LLVM_ABI CallInst *CreateConstrainedFPIntrinsic(
+  LLVM_ABI Value *CreateConstrainedFPIntrinsic(
       Intrinsic::ID ID, ArrayRef<Type *> Types, ArrayRef<Value *> Args,
       FMFSource FMFSource, const Twine &Name, MDNode *FPMathTag = nullptr,
       std::optional<RoundingMode> Rounding = std::nullopt,
       std::optional<fp::ExceptionBehavior> Except = std::nullopt);
 
-  LLVM_ABI CallInst *CreateConstrainedFPBinOp(
+  LLVM_ABI Value *CreateConstrainedFPBinOp(
       Intrinsic::ID ID, Value *L, Value *R, FMFSource FMFSource = {},
       const Twine &Name = "", MDNode *FPMathTag = nullptr,
       std::optional<RoundingMode> Rounding = std::nullopt,
       std::optional<fp::ExceptionBehavior> Except = std::nullopt);
 
-  LLVM_ABI CallInst *CreateConstrainedFPUnroundedBinOp(
+  LLVM_ABI Value *CreateConstrainedFPUnroundedBinOp(
       Intrinsic::ID ID, Value *L, Value *R, FMFSource FMFSource = {},
       const Twine &Name = "", MDNode *FPMathTag = nullptr,
       std::optional<fp::ExceptionBehavior> Except = std::nullopt);
@@ -1916,8 +1946,8 @@ class IRBuilderBase {
   CallInst *CreateStructuredAlloca(Type *BaseType, const Twine &Name = "") {
     const DataLayout &DL = BB->getDataLayout();
     PointerType *PtrTy = DL.getAllocaPtrType(Context);
-    CallInst *Output =
-        CreateIntrinsic(Intrinsic::structured_alloca, {PtrTy}, {}, {}, Name);
+    auto *Output = CreateIntrinsicWithoutFolding(Intrinsic::structured_alloca,
+                                                 {PtrTy}, {}, {}, Name);
     Output->addRetAttr(
         Attribute::get(getContext(), Attribute::ElementType, BaseType));
     return Output;
@@ -2003,18 +2033,20 @@ class IRBuilderBase {
         new AtomicRMWInst(Op, Ptr, Val, *Align, Ordering, SSID, Elementwise));
   }
 
-  CallInst *CreateStructuredGEP(Type *BaseType, Value *PtrBase,
-                                ArrayRef<Value *> Indices,
-                                const Twine &Name = "") {
+  Value *CreateStructuredGEP(Type *BaseType, Value *PtrBase,
+                             ArrayRef<Value *> Indices,
+                             const Twine &Name = "") {
     SmallVector<Value *> Args;
     Args.push_back(PtrBase);
     llvm::append_range(Args, Indices);
 
-    CallInst *Output = CreateIntrinsic(Intrinsic::structured_gep,
-                                       {PtrBase->getType()}, Args, {}, Name);
-    Output->addParamAttr(
-        0, Attribute::get(getContext(), Attribute::ElementType, BaseType));
-    return Output;
+    return CreateIntrinsic(
+        Intrinsic::structured_gep, {PtrBase->getType()}, Args, {}, Name, {},
+        [&](CallInst *Output) {
+          Output->addParamAttr(
+              0,
+              Attribute::get(getContext(), Attribute::ElementType, BaseType));
+        });
   }
 
   Value *CreateGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
@@ -2352,7 +2384,7 @@ class IRBuilderBase {
     return CreateCast(CastOp, V, DestTy, Name, FPMathTag);
   }
 
-  LLVM_ABI CallInst *CreateConstrainedFPCast(
+  LLVM_ABI Value *CreateConstrainedFPCast(
       Intrinsic::ID ID, Value *V, Type *DestTy, FMFSource FMFSource = {},
       const Twine &Name = "", MDNode *FPMathTag = nullptr,
       std::optional<RoundingMode> Rounding = std::nullopt,
@@ -2537,7 +2569,7 @@ class IRBuilderBase {
                                    FMFSource FMFSource, bool IsSignaling);
 
 public:
-  LLVM_ABI CallInst *CreateConstrainedFPCmp(
+  LLVM_ABI Value *CreateConstrainedFPCmp(
       Intrinsic::ID ID, CmpInst::Predicate P, Value *L, Value *R,
       const Twine &Name = "",
       std::optional<fp::ExceptionBehavior> Except = std::nullopt);
diff --git a/llvm/lib/CodeGen/InlineAsmPrepare.cpp b/llvm/lib/CodeGen/InlineAsmPrepare.cpp
index 0bbf55c3d31e4..9587bf34649c1 100644
--- a/llvm/lib/CodeGen/InlineAsmPrepare.cpp
+++ b/llvm/lib/CodeGen/InlineAsmPrepare.cpp
@@ -188,7 +188,7 @@ static bool insertIntrinsicCalls(CallBrInst *CBR, DominatorTree &DT) {
       continue;
 
     Builder.SetInsertPoint(&*IndDest->begin());
-    CallInst *Intrinsic = Builder.CreateIntrinsic(
+    CallInst *Intrinsic = Builder.CreateIntrinsicWithoutFolding(
         CBR->getType(), Intrinsic::callbr_landingpad, {CBR});
     SSAUpdate.AddAvailableValue(IndDest, Intrinsic);
     updateSSA(DT, CBR, Intrinsic, SSAUpdate);
diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp
index 152dcca209298..768f44141c432 100644
--- a/llvm/lib/CodeGen/SafeStack.cpp
+++ b/llvm/lib/CodeGen/SafeStack.cpp
@@ -556,7 +556,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
   if (FrameAlignment > StackAlignment) {
     // Re-align the base pointer according to the max requested alignment.
     IRB.SetInsertPoint(BasePointer->getNextNode());
-    BasePointer = IRB.CreateIntrinsic(
+    BasePointer = IRB.CreateIntrinsicWithoutFolding(
         StackPtrTy, Intrinsic::ptrmask,
         {BasePointer, ConstantInt::get(AddrTy, ~(FrameAlignment.value() - 1))});
   }
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 876d642b98fb0..b08859ee22a30 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -254,10 +254,11 @@ CallInst *IRBuilderBase::CreateMemSet(Value *Ptr, Value *Val, Value *Size,
   Value *Ops[] = {Ptr, Val, Size, getInt1(isVolatile)};
   Type *Tys[] = {Ptr->getType(), Size->getType()};
 
-  CallInst *CI = CreateIntrinsic(Intrinsic::memset, Tys, Ops);
+  auto *CI = cast<MemSetInst>(
+      CreateIntrinsicWithoutFolding(Intrinsic::memset, Tys, Ops));
 
   if (Align)
-    cast<MemSetInst>(CI)->setDestAlignment(*Align);
+    CI->setDestAlignment(*Align);
   CI->setAAMetadata(AAInfo);
   return CI;
 }
@@ -269,10 +270,11 @@ CallInst *IRBuilderBase::CreateMemSetInline(Value *Dst, MaybeAlign DstAlign,
   Value *Ops[] = {Dst, Val, Size, getInt1(IsVolatile)};
   Type *Tys[] = {Dst->getType(), Size->getType()};
 
-  CallInst *CI = CreateIntrinsic(Intrinsic::memset_inline, Tys, Ops);
+  auto *CI = cast<MemSetInst>(
+      CreateIntrinsicWithoutFolding(Intrinsic::memset_inline, Tys, Ops));
 
   if (DstAlign)
-    cast<MemSetInst>(CI)->setDestAlignment(*DstAlign);
+    CI->setDestAlignment(*DstAlign);
   CI->setAAMetadata(AAInfo);
   return CI;
 }
@@ -284,10 +286,9 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemSet(
   Value *Ops[] = {Ptr, Val, Size, getInt32(ElementSize)};
   Type *Tys[] = {Ptr->getType(), Size->getType()};
 
-  CallInst *CI =
-      CreateIntrinsic(Intrinsic::memset_element_unordered_atomic, Tys, Ops);
-
-  cast<AnyMemSetInst>(CI)->setDestAlignment(Alignment);
+  auto *CI = cast<AnyMemSetInst>(CreateIntrinsicWithoutFolding(
+      Intrinsic::memset_element_unordered_atomic, Tys, Ops));
+  CI->setDestAlignment(Alignment);
   CI->setAAMetadata(AAInfo);
   return CI;
 }
@@ -303,15 +304,15 @@ CallInst *IRBuilderBase::CreateMemTransferInst(Intrinsic::ID IntrID, Value *Dst,
   Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
   Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()};
 
-  CallInst *CI = CreateIntrinsic(IntrID, Tys, Ops);
+  auto *MCI =
+      cast<MemTransferInst>(CreateIntrinsicWithoutFolding(IntrID, Tys, Ops));
 
-  auto* MCI = cast<MemTransferInst>(CI);
   if (DstAlign)
     MCI->setDestAlignment(*DstAlign);
   if (SrcAlign)
     MCI->setSourceAlignment(*SrcAlign);
   MCI->setAAMetadata(AAInfo);
-  return CI;
+  return MCI;
 }
 
 CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy(
@@ -324,15 +325,14 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy(
   Value *Ops[] = {Dst, Src, Size, getInt32(ElementSize)};
   Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()};
 
-  CallInst *CI =
-      CreateIntrinsic(Intrinsic::memcpy_element_unordered_atomic, Tys, Ops);
+  auto *AMCI = cast<AnyMemCpyInst>(CreateIntrinsicWithoutFolding(
+      Intrinsic::memcpy_element_unordered_atomic, Tys, Ops));
 
   // Set the alignment of the pointer args.
-  auto *AMCI = cast<AnyMemCpyInst>(CI);
   AMCI->setDestAlignment(DstAlign);
   AMCI->setSourceAlignment(SrcAlign);
   AMCI->setAAMetadata(AAInfo);
-  return CI;
+  return AMCI;
 }
 
 /// isConstantOne - Return true only if val is constant int 1
@@ -423,8 +423,8 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemMove(
   Value *Ops[] = {Dst, Src, Size, getInt32(ElementSize)};
   Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()};
 
-  CallInst *CI =
-      CreateIntrinsic(Intrinsic::memmove_element_unordered_atomic, Tys, Ops);
+  CallInst *CI = CreateIntrinsicWithoutFolding(
+      Intrinsic::memmove_element_unordered_atomic, Tys, Ops);
 
   // Set the alignment of the pointer args.
   CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), DstAlign));
@@ -433,80 +433,82 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemMove(
   return CI;
 }
 
-CallInst *IRBuilderBase::getReductionIntrinsic(Intrinsic::ID ID, Value *Src) {
+Value *IRBuilderBase::getReductionIntrinsic(Intrinsic::ID ID, Value *Src) {
   Value *Ops[] = {Src};
   Type *Tys[] = { Src->getType() };
   return CreateIntrinsic(ID, Tys, Ops);
 }
 
-CallInst *IRBuilderBase::CreateFAddReduce(Value *Acc, Value *Src) {
+Value *IRBuilderBase::CreateFAddReduce(Value *Acc, Value *Src) {
   Value *Ops[] = {Acc, Src};
   return CreateIntrinsic(Intrinsic::vector_reduce_fadd, {Src->getType()}, Ops);
 }
 
-CallInst *IRBuilderBase::CreateFMulReduce(Value *Acc, Value *Src) {
+Value *IRBuilderBase::CreateFMulReduce(Value *Acc, Value *Src) {
   Value *Ops[] = {Acc, Src};
   return CreateIntrinsic(Intrinsic::vector_reduce_fmul, {Src->getType()}, Ops);
 }
 
-CallInst *IRBuilderBase::CreateAddReduce(Value *Src) {
+Value *IRBuilderBase::CreateAddReduce(Value *Src) {
   return getReductionIntrinsic(Intrinsic::vector_reduce_add, Src);
 }
 
-CallInst *IRBuilderBase::CreateMulReduce(Value *Src) {
+Value *IRBuilderBase::CreateMulReduce(Value *Src) {
   return getReductionIntrinsic(Intrinsic::vector_reduce_mul, Src);
 }
 
-CallInst *IRBuilderBase::CreateAndReduce(Value *Src) {
+Value *IRBuilderBase::CreateAndReduce(Value *Src) {
   return getReductionIntrinsic(Intrinsic::vector_reduce_and, Src);
 }
 
-CallInst *IRBuilderBase::CreateOrReduce(Value *Src) {
+Value *IRBuilderBase::CreateOrReduce(Value *Src) {
   return getReductionIntrinsic(Intrinsic::vector_reduce_or, Src);
 }
 
-CallInst *IRBuilderBase::CreateXorReduce(Value *Src) {
+Value *IRBuilderBase::CreateXorReduce(Value *Src) {
   return getReductionIntrinsic(Intrinsic::vector_reduce_xor, Src);
 }
 
-CallInst *IRBuilderBase::CreateIntMaxReduce(Value *Src, bool IsSigned) {
+Value *IRBuilderBase::CreateIntMaxReduce(Value *Src, bool IsSigned) {
   auto ID =
       IsSigned ? Intrinsic::vector_reduce_smax : Intrinsic::vector_reduce_umax;
   return getReductionIntrinsic(ID, Src);
 }
 
-CallInst *IRBuilderBase::CreateIntMinReduce(Value *Src, bool IsSigned) {
+Value *IRBuilderBase::CreateIntMinReduce(Value *Src, bool IsSigned) {
   auto ID =
       IsSigned ? Intrinsic::vector_reduce_smin : Intrinsic::vector_reduce_umin;
   return getReductionIntrinsic(ID, Src);
 }
 
-CallInst *IRBuilderBase::CreateFPMaxReduce(Value *Src) {
+Value *IRBuilderBase::CreateFPMaxReduce(Value *Src) {
   return getReductionIntrinsic(Intrinsic::vector_reduce_fmax, Src);
 }
 
-CallInst *IRBuilderBase::CreateFPMinReduce(Value *Src) {
+Value *IRBuilderBase::CreateFPMinReduce(Value *Src) {
   return getReductionIntrinsic(Intrinsic::vector_reduce_fmin, Src);
 }
 
-CallInst *IRBuilderBase::CreateFPMaximumReduce(Value *Src) {
+Value *IRBuilderBase::CreateFPMaximumReduce(Value *Src) {
   return getReductionIntrinsic(Intrinsic::vector_reduce_fmaximum, Src);
 }
 
-CallInst *IRBuilderBase::CreateFPMinimumReduce(Value *Src) {
+Value *IRBuilderBase::CreateFPMinimumReduce(Value *Src) {
   return getReductionIntrinsic(Intrinsic::vector_reduce_fminimum, Src);
 }
 
 CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr) {
   assert(isa<PointerType>(Ptr->getType()) &&
          "lifetime.start only applies to pointers.");
-  return CreateIntrinsic(Intrinsic::lifetime_start, {Ptr->getType()}, {Ptr});
+  return CreateIntrinsicWithoutFolding(Intrinsic::lifetime_start,
+                                       {Ptr->getType()}, {Ptr});
 }
 
 CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr) {
   assert(isa<PointerType>(Ptr->getType()) &&
          "lifetime.end only applies to pointers.");
-  return CreateIntrinsic(Intrinsic::lifetime_end, {Ptr->getType()}, {Ptr});
+  return CreateIntrinsicWithoutFolding(Intrinsic::lifetime_end,
+                                       {Ptr->getType()}, {Ptr});
 }
 
 CallInst *IRBuilderBase::CreateInvariantStart(Value *Ptr, ConstantInt *Size) {
@@ -522,7 +524,8 @@ CallInst *IRBuilderBase::CreateInvariantStart(Value *Ptr, ConstantInt *Size) {
   Value *Ops[] = {Size, Ptr};
   // Fill in the single overloaded type: memory object type.
   Type *ObjectPtr[1] = {Ptr->getType()};
-  return CreateIntrinsic(Intrinsic::invariant_start, ObjectPtr, Ops);
+  return CreateIntrinsicWithoutFolding(Intrinsic::invariant_start, ObjectPtr,
+                                       Ops);
 }
 
 static MaybeAlign getAlign(Value *Ptr) {
@@ -536,8 +539,8 @@ static MaybeAlign getAlign(Value *Ptr) {
 CallInst *IRBuilderBase::CreateThreadLocalAddress(Value *Ptr) {
   assert(isa<GlobalValue>(Ptr) && cast<GlobalValue>(Ptr)->isThreadLocal() &&
          "threadlocal_address only applies to thread local variables.");
-  CallInst *CI = CreateIntrinsic(llvm::Intrinsic::threadlocal_address,
-                                 {Ptr->getType()}, {Ptr});
+  CallInst *CI = CreateIntrinsicWithoutFolding(
+      llvm::Intrinsic::threadlocal_address, {Ptr->getType()}, {Ptr});
   if (MaybeAlign A = getAlign(Ptr)) {
     CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), *A));
     CI->addRetAttr(Attribute::getWithAlignment(CI->getContext(), *A));
@@ -548,19 +551,21 @@ CallInst *IRBuilderBase::CreateThreadLocalAddress(Value *Ptr) {
 CallInst *IRBuilderBase::CreateAssumption(Value *Cond) {
   assert(Cond->getType() == getInt1Ty() &&
          "an assumption condition must be of type i1");
-  return CreateIntrinsic(Intrinsic::assume, /*OverloadTypes=*/{}, {Cond});
+  return CreateIntrinsicWithoutFolding(Intrinsic::assume, /*OverloadTypes=*/{},
+                                       {Cond});
 }
 
 CallInst *
 IRBuilderBase::CreateAssumption(ArrayRef<OperandBundleDef> OpBundles) {
   Value *Args[] = {ConstantInt::getTrue(getContext())};
-  return CreateIntrinsic(Intrinsic::assume, /*OverloadTypes=*/{}, Args,
-                         /*FMFSource=*/nullptr, /*Name=*/"", OpBundles);
+  return CreateIntrinsicWithoutFolding(
+      Intrinsic::assume, /*OverloadTypes=*/{}, Args,
+      /*FMFSource=*/nullptr, /*Name=*/"", OpBundles);
 }
 
 Instruction *IRBuilderBase::CreateNoAliasScopeDeclaration(Value *Scope) {
-  return CreateIntrinsic(Intrinsic::experimental_noalias_scope_decl, {},
-                         {Scope});
+  return CreateIntrinsicWithoutFolding(
+      Intrinsic::experimental_noalias_scope_decl, {}, {Scope});
 }
 
 /// Create a call to a Masked Load intrinsic.
@@ -615,7 +620,7 @@ CallInst *IRBuilderBase::CreateMaskedIntrinsic(Intrinsic::ID Id,
                                                ArrayRef<Value *> Ops,
                                                ArrayRef<Type *> OverloadedTypes,
                                                const Twine &Name) {
-  return CreateIntrinsic(Id, OverloadedTypes, Ops, {}, Name);
+  return CreateIntrinsicWithoutFolding(Id, OverloadedTypes, Ops, {}, Name);
 }
 
 /// Create a call to a Masked Gather intrinsic.
@@ -884,7 +889,7 @@ CallInst *IRBuilderBase::CreateGCResult(Instruction *Statepoint,
   Type *Types[] = {ResultType};
 
   Value *Args[] = {Statepoint};
-  return CreateIntrinsic(ID, Types, Args, {}, Name);
+  return CreateIntrinsicWithoutFolding(ID, Types, Args, {}, Name);
 }
 
 CallInst *IRBuilderBase::CreateGCRelocate(Instruction *Statepoint,
@@ -893,22 +898,24 @@ CallInst *IRBuilderBase::CreateGCRelocate(Instruction *Statepoint,
   Type *Types[] = {ResultType};
 
   Value *Args[] = {Statepoint, getInt32(BaseOffset), getInt32(DerivedOffset)};
-  return CreateIntrinsic(Intrinsic::experimental_gc_relocate, Types, Args, {},
-                         Name);
+  return CreateIntrinsicWithoutFolding(Intrinsic::experimental_gc_relocate,
+                                       Types, Args, {}, Name);
 }
 
 CallInst *IRBuilderBase::CreateGCGetPointerBase(Value *DerivedPtr,
                                                 const Twine &Name) {
   Type *PtrTy = DerivedPtr->getType();
-  return CreateIntrinsic(Intrinsic::experimental_gc_get_pointer_base,
-                         {PtrTy, PtrTy}, {DerivedPtr}, {}, Name);
+  return CreateIntrinsicWithoutFolding(
+      Intrinsic::experimental_gc_get_pointer_base, {PtrTy, PtrTy}, {DerivedPtr},
+      {}, Name);
 }
 
 CallInst *IRBuilderBase::CreateGCGetPointerOffset(Value *DerivedPtr,
                                                   const Twine &Name) {
   Type *PtrTy = DerivedPtr->getType();
-  return CreateIntrinsic(Intrinsic::experimental_gc_get_pointer_offset, {PtrTy},
-                         {DerivedPtr}, {}, Name);
+  return CreateIntrinsicWithoutFolding(
+      Intrinsic::experimental_gc_get_pointer_offset, {PtrTy}, {DerivedPtr}, {},
+      Name);
 }
 
 Value *IRBuilderBase::CreateUnaryIntrinsic(Intrinsic::ID ID, Value *Op,
@@ -933,27 +940,51 @@ Value *IRBuilderBase::CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS,
   return createCallHelper(Fn, {LHS, RHS}, Name, FMFSource);
 }
 
-CallInst *IRBuilderBase::CreateIntrinsic(Intrinsic::ID ID,
-                                         ArrayRef<Type *> OverloadTypes,
-                                         ArrayRef<Value *> Args,
-                                         FMFSource FMFSource, const Twine &Name,
-                                         ArrayRef<OperandBundleDef> OpBundles) {
+CallInst *IRBuilderBase::CreateIntrinsicWithoutFolding(
+    Intrinsic::ID ID, ArrayRef<Type *> OverloadTypes, ArrayRef<Value *> Args,
+    FMFSource FMFSource, const Twine &Name,
+    ArrayRef<OperandBundleDef> OpBundles) {
   Module *M = BB->getModule();
   Function *Fn = Intrinsic::getOrInsertDeclaration(M, ID, OverloadTypes);
   return createCallHelper(Fn, Args, Name, FMFSource, OpBundles);
 }
 
-CallInst *IRBuilderBase::CreateIntrinsic(Type *RetTy, Intrinsic::ID ID,
-                                         ArrayRef<Value *> Args,
-                                         FMFSource FMFSource,
-                                         const Twine &Name) {
+CallInst *IRBuilderBase::CreateIntrinsicWithoutFolding(Type *RetTy,
+                                                       Intrinsic::ID ID,
+                                                       ArrayRef<Value *> Args,
+                                                       FMFSource FMFSource,
+                                                       const Twine &Name) {
   Module *M = BB->getModule();
   SmallVector<Type *> ArgTys = llvm::map_to_vector(Args, &Value::getType);
   Function *Fn = Intrinsic::getOrInsertDeclaration(M, ID, RetTy, ArgTys);
   return createCallHelper(Fn, Args, Name, FMFSource);
 }
 
-CallInst *IRBuilderBase::CreateConstrainedFPBinOp(
+Value *IRBuilderBase::CreateIntrinsic(Intrinsic::ID ID,
+                                      ArrayRef<Type *> OverloadTypes,
+                                      ArrayRef<Value *> Args,
+                                      FMFSource FMFSource, const Twine &Name,
+                                      ArrayRef<OperandBundleDef> OpBundles,
+                                      std::function<void(CallInst *)> SetFn) {
+  // TODO: Try to constant-fold.
+  CallInst *CI = CreateIntrinsicWithoutFolding(ID, OverloadTypes, Args,
+                                               FMFSource, Name, OpBundles);
+  SetFn(CI);
+  return CI;
+}
+
+Value *IRBuilderBase::CreateIntrinsic(Type *RetTy, Intrinsic::ID ID,
+                                      ArrayRef<Value *> Args,
+                                      FMFSource FMFSource, const Twine &Name,
+                                      std::function<void(CallInst *)> SetFn) {
+  // TODO: Try to constant-fold.
+  CallInst *CI =
+      CreateIntrinsicWithoutFolding(RetTy, ID, Args, FMFSource, Name);
+  SetFn(CI);
+  return CI;
+}
+
+Value *IRBuilderBase::CreateConstrainedFPBinOp(
     Intrinsic::ID ID, Value *L, Value *R, FMFSource FMFSource,
     const Twine &Name, MDNode *FPMathTag, std::optional<RoundingMode> Rounding,
     std::optional<fp::ExceptionBehavior> Except) {
@@ -961,15 +992,14 @@ CallInst *IRBuilderBase::CreateConstrainedFPBinOp(
   Value *ExceptV = getConstrainedFPExcept(Except);
 
   FastMathFlags UseFMF = FMFSource.get(FMF);
-
-  CallInst *C = CreateIntrinsic(ID, {L->getType()},
-                                {L, R, RoundingV, ExceptV}, nullptr, Name);
-  setConstrainedFPCallAttr(C);
-  setFPAttrs(C, FPMathTag, UseFMF);
-  return C;
+  return CreateIntrinsic(ID, {L->getType()}, {L, R, RoundingV, ExceptV},
+                         nullptr, Name, {}, [&](CallInst *C) {
+                           setConstrainedFPCallAttr(C);
+                           setFPAttrs(C, FPMathTag, UseFMF);
+                         });
 }
 
-CallInst *IRBuilderBase::CreateConstrainedFPIntrinsic(
+Value *IRBuilderBase::CreateConstrainedFPIntrinsic(
     Intrinsic::ID ID, ArrayRef<Type *> Types, ArrayRef<Value *> Args,
     FMFSource FMFSource, const Twine &Name, MDNode *FPMathTag,
     std::optional<RoundingMode> Rounding,
@@ -982,26 +1012,25 @@ CallInst *IRBuilderBase::CreateConstrainedFPIntrinsic(
   llvm::SmallVector<Value *, 5> ExtArgs(Args);
   ExtArgs.push_back(RoundingV);
   ExtArgs.push_back(ExceptV);
-
-  CallInst *C = CreateIntrinsic(ID, Types, ExtArgs, nullptr, Name);
-  setConstrainedFPCallAttr(C);
-  setFPAttrs(C, FPMathTag, UseFMF);
-  return C;
+  return CreateIntrinsic(ID, Types, ExtArgs, nullptr, Name, {},
+                         [&](CallInst *C) {
+                           setConstrainedFPCallAttr(C);
+                           setFPAttrs(C, FPMathTag, UseFMF);
+                         });
 }
 
-CallInst *IRBuilderBase::CreateConstrainedFPUnroundedBinOp(
+Value *IRBuilderBase::CreateConstrainedFPUnroundedBinOp(
     Intrinsic::ID ID, Value *L, Value *R, FMFSource FMFSource,
     const Twine &Name, MDNode *FPMathTag,
     std::optional<fp::ExceptionBehavior> Except) {
   Value *ExceptV = getConstrainedFPExcept(Except);
 
   FastMathFlags UseFMF = FMFSource.get(FMF);
-
-  CallInst *C =
-      CreateIntrinsic(ID, {L->getType()}, {L, R, ExceptV}, nullptr, Name);
-  setConstrainedFPCallAttr(C);
-  setFPAttrs(C, FPMathTag, UseFMF);
-  return C;
+  return CreateIntrinsic(ID, {L->getType()}, {L, R, ExceptV}, nullptr, Name, {},
+                         [&](CallInst *C) {
+                           setConstrainedFPCallAttr(C);
+                           setFPAttrs(C, FPMathTag, UseFMF);
+                         });
 }
 
 Value *IRBuilderBase::CreateNAryOp(unsigned Opc, ArrayRef<Value *> Ops,
@@ -1019,27 +1048,28 @@ Value *IRBuilderBase::CreateNAryOp(unsigned Opc, ArrayRef<Value *> Ops,
   llvm_unreachable("Unexpected opcode!");
 }
 
-CallInst *IRBuilderBase::CreateConstrainedFPCast(
+Value *IRBuilderBase::CreateConstrainedFPCast(
     Intrinsic::ID ID, Value *V, Type *DestTy, FMFSource FMFSource,
     const Twine &Name, MDNode *FPMathTag, std::optional<RoundingMode> Rounding,
     std::optional<fp::ExceptionBehavior> Except) {
   Value *ExceptV = getConstrainedFPExcept(Except);
 
   FastMathFlags UseFMF = FMFSource.get(FMF);
+  auto SetFn = [&](CallInst *C) {
+    setConstrainedFPCallAttr(C);
+
+    if (isa<FPMathOperator>(C))
+      setFPAttrs(C, FPMathTag, UseFMF);
+  };
 
-  CallInst *C;
+  Value *C;
   if (Intrinsic::hasConstrainedFPRoundingModeOperand(ID)) {
     Value *RoundingV = getConstrainedFPRounding(Rounding);
     C = CreateIntrinsic(ID, {DestTy, V->getType()}, {V, RoundingV, ExceptV},
-                        nullptr, Name);
+                        nullptr, Name, {}, SetFn);
   } else
-    C = CreateIntrinsic(ID, {DestTy, V->getType()}, {V, ExceptV}, nullptr,
-                        Name);
-
-  setConstrainedFPCallAttr(C);
-
-  if (isa<FPMathOperator>(C))
-    setFPAttrs(C, FPMathTag, UseFMF);
+    C = CreateIntrinsic(ID, {DestTy, V->getType()}, {V, ExceptV}, nullptr, Name,
+                        {}, SetFn);
   return C;
 }
 
@@ -1060,16 +1090,15 @@ Value *IRBuilderBase::CreateFCmpHelper(CmpInst::Predicate P, Value *LHS,
       Name);
 }
 
-CallInst *IRBuilderBase::CreateConstrainedFPCmp(
+Value *IRBuilderBase::CreateConstrainedFPCmp(
     Intrinsic::ID ID, CmpInst::Predicate P, Value *L, Value *R,
     const Twine &Name, std::optional<fp::ExceptionBehavior> Except) {
   Value *PredicateV = getConstrainedFPPredicate(P);
   Value *ExceptV = getConstrainedFPExcept(Except);
 
-  CallInst *C = CreateIntrinsic(ID, {L->getType()},
-                                {L, R, PredicateV, ExceptV}, nullptr, Name);
-  setConstrainedFPCallAttr(C);
-  return C;
+  return CreateIntrinsic(ID, {L->getType()}, {L, R, PredicateV, ExceptV},
+                         nullptr, Name, {},
+                         [&](CallInst *C) { setConstrainedFPCallAttr(C); });
 }
 
 CallInst *IRBuilderBase::CreateConstrainedFPCall(
@@ -1300,9 +1329,9 @@ Value *IRBuilderBase::CreatePreserveArrayAccessIndex(Type *ElTy, Value *Base,
   Type *ResultType = GetElementPtrInst::getGEPReturnType(Base, IdxList);
 
   Value *DimV = getInt32(Dimension);
-  CallInst *Fn =
-      CreateIntrinsic(Intrinsic::preserve_array_access_index,
-                      {ResultType, BaseType}, {Base, DimV, LastIndexV});
+  CallInst *Fn = CreateIntrinsicWithoutFolding(
+      Intrinsic::preserve_array_access_index, {ResultType, BaseType},
+      {Base, DimV, LastIndexV});
   Fn->addParamAttr(
       0, Attribute::get(Fn->getContext(), Attribute::ElementType, ElTy));
   if (DbgInfo)
@@ -1318,8 +1347,9 @@ Value *IRBuilderBase::CreatePreserveUnionAccessIndex(
   auto *BaseType = Base->getType();
 
   Value *DIIndex = getInt32(FieldIndex);
-  CallInst *Fn = CreateIntrinsic(Intrinsic::preserve_union_access_index,
-                                 {BaseType, BaseType}, {Base, DIIndex});
+  CallInst *Fn =
+      CreateIntrinsicWithoutFolding(Intrinsic::preserve_union_access_index,
+                                    {BaseType, BaseType}, {Base, DIIndex});
   if (DbgInfo)
     Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
 
@@ -1339,9 +1369,9 @@ Value *IRBuilderBase::CreatePreserveStructAccessIndex(
       GetElementPtrInst::getGEPReturnType(Base, {Zero, GEPIndex});
 
   Value *DIIndex = getInt32(FieldIndex);
-  CallInst *Fn =
-      CreateIntrinsic(Intrinsic::preserve_struct_access_index,
-                      {ResultType, BaseType}, {Base, GEPIndex, DIIndex});
+  CallInst *Fn = CreateIntrinsicWithoutFolding(
+      Intrinsic::preserve_struct_access_index, {ResultType, BaseType},
+      {Base, GEPIndex, DIIndex});
   Fn->addParamAttr(
       0, Attribute::get(Fn->getContext(), Attribute::ElementType, ElTy));
   if (DbgInfo)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 57a2d73e00f57..f3e67c4fef9df 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -31352,7 +31352,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
 
   const DataLayout &DL = M->getDataLayout();
   IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
-  CallInst *CI = Builder.CreateIntrinsic(Int, Tys, Addr);
+  CallInst *CI = Builder.CreateIntrinsicWithoutFolding(Int, Tys, Addr);
   CI->addParamAttr(0, Attribute::get(Builder.getContext(),
                                      Attribute::ElementType, IntEltTy));
   Value *Trunc = Builder.CreateTrunc(CI, IntEltTy);
diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
index 7ef18fa18255d..c60d0e3b826b6 100644
--- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -477,9 +477,9 @@ Instruction *AArch64StackTagging::insertBaseTaggedPointer(
   assert(PrologueBB);
 
   IRBuilder<> IRB(&PrologueBB->front());
-  Instruction *Base =
-      IRB.CreateIntrinsic(Intrinsic::aarch64_irg_sp, {},
-                          {Constant::getNullValue(IRB.getInt64Ty())});
+  Instruction *Base = IRB.CreateIntrinsicWithoutFolding(
+      Intrinsic::aarch64_irg_sp, {},
+      {Constant::getNullValue(IRB.getInt64Ty())});
   Base->setName("basetag");
   const Triple &TargetTriple = M.getTargetTriple();
   // This ABI will make it into Android API level 35.
@@ -583,10 +583,10 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
     NextTag = (NextTag + 1) % 16;
     // Replace alloca with tagp(alloca).
     IRBuilder<> IRB(Info.AI->getNextNode());
-    Instruction *TagPCall =
-        IRB.CreateIntrinsic(Intrinsic::aarch64_tagp, {Info.AI->getType()},
-                            {Constant::getNullValue(Info.AI->getType()), Base,
-                             ConstantInt::get(IRB.getInt64Ty(), Tag)});
+    Instruction *TagPCall = IRB.CreateIntrinsicWithoutFolding(
+        Intrinsic::aarch64_tagp, {Info.AI->getType()},
+        {Constant::getNullValue(Info.AI->getType()), Base,
+         ConstantInt::get(IRB.getInt64Ty(), Tag)});
     if (Info.AI->hasName())
       TagPCall->setName(Info.AI->getName() + ".tag");
     // Does not replace metadata, so we don't have to handle DbgVariableRecords.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 6a5a31fcfeef9..8c92251fcb501 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2434,7 +2434,7 @@ instCombineSVEVectorFuseMulAddSub(InstCombiner &IC, IntrinsicInst &II,
     FMFSource = &II;
   }
 
-  CallInst *Res;
+  Value *Res;
   if (MergeIntoAddendOp)
     Res = IC.Builder.CreateIntrinsic(FuseOpc, {II.getType()},
                                      {P, AddendOp, MulOp0, MulOp1}, FMFSource);
diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
index cf4f59bd6c304..0dfb2a8c0b991 100644
--- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
+++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@@ -160,7 +160,7 @@ bool SVEIntrinsicOpts::coalescePTrueIntrinsicCalls(
 
   auto *MostEncompassingPTrueVTy =
       cast<VectorType>(MostEncompassingPTrue->getType());
-  auto *ConvertToSVBool = Builder.CreateIntrinsic(
+  auto *ConvertToSVBool = Builder.CreateIntrinsicWithoutFolding(
       Intrinsic::aarch64_sve_convert_to_svbool, {MostEncompassingPTrueVTy},
       {MostEncompassingPTrue});
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsanInstrumentation.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsanInstrumentation.cpp
index 9af3b05ff01fa..fddc8a3da0b53 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsanInstrumentation.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsanInstrumentation.cpp
@@ -73,7 +73,7 @@ static Instruction *genAMDGPUReportBlock(Module &M, IRBuilder<> &IRB,
 
   Trm = SplitBlockAndInsertIfThen(Cond, Trm, false);
   IRB.SetInsertPoint(Trm);
-  return IRB.CreateIntrinsic(Intrinsic::amdgcn_unreachable, {});
+  return IRB.CreateIntrinsicWithoutFolding(Intrinsic::amdgcn_unreachable, {});
 }
 
 static Value *createSlowPathCmp(Module &M, IRBuilder<> &IRB, Type *IntptrTy,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index cf4c2bbc78359..5c720718f9ab3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -691,8 +691,8 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
   // We need to know how many lanes are active within the wavefront, and we do
   // this by doing a ballot of active lanes.
   Type *const WaveTy = B.getIntNTy(ST.getWavefrontSize());
-  CallInst *const Ballot =
-      B.CreateIntrinsic(Intrinsic::amdgcn_ballot, WaveTy, B.getTrue());
+  CallInst *const Ballot = B.CreateIntrinsicWithoutFolding(
+      Intrinsic::amdgcn_ballot, WaveTy, B.getTrue());
 
   // We need to know how many lanes are active within the wavefront that are
   // below us. If we counted each lane linearly starting from 0, a lane is
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 394c2326a64ef..68fc1107a18ca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -2324,7 +2324,8 @@ INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations",
 
 /// Create a workitem.id.x intrinsic call with range metadata.
 CallInst *AMDGPUCodeGenPrepareImpl::createWorkitemIdX(IRBuilder<> &B) const {
-  CallInst *Tid = B.CreateIntrinsic(Intrinsic::amdgcn_workitem_id_x, {});
+  CallInst *Tid =
+      B.CreateIntrinsicWithoutFolding(Intrinsic::amdgcn_workitem_id_x, {});
   ST.makeLIDRangeMetadata(Tid);
   return Tid;
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp
index dd6858a15749e..04ede2b7dfdd5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp
@@ -240,7 +240,8 @@ bool optimizeSection(ArrayRef<SmallVector<IntrinsicInst *, 4>> MergeableInsts) {
       Args[ImageDimIntr->DMaskIndex] =
           ConstantInt::get(DMask->getType(), NewMaskVal);
       Args[FragIdIndex] = ConstantInt::get(FragId->getType(), NewFragIdVal);
-      CallInst *NewCall = B.CreateIntrinsic(NewIntrinID, OverloadTys, Args);
+      CallInst *NewCall =
+          B.CreateIntrinsicWithoutFolding(NewIntrinID, OverloadTys, Args);
       LLVM_DEBUG(dbgs() << "Optimize: " << *NewCall << "\n");
 
       NewCalls.push_back(NewCall);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index b17ccef685b21..3c22ce96ff1f6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -136,7 +136,8 @@ static std::optional<Instruction *> modifyIntrinsicCall(
   // Modify arguments and types
   Func(Args, OverloadTys);
 
-  CallInst *NewCall = IC.Builder.CreateIntrinsic(NewIntr, OverloadTys, Args);
+  CallInst *NewCall =
+      IC.Builder.CreateIntrinsicWithoutFolding(NewIntr, OverloadTys, Args);
   NewCall->takeName(&OldIntr);
   NewCall->copyMetadata(OldIntr);
   if (isa<FPMathOperator>(NewCall))
@@ -1773,7 +1774,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
 
       Value *Args[] = {SrcLHS, SrcRHS,
                        ConstantInt::get(CC->getType(), SrcPred)};
-      CallInst *NewCall = IC.Builder.CreateIntrinsic(
+      Value *NewCall = IC.Builder.CreateIntrinsic(
           NewIID, {II.getType(), SrcLHS->getType()}, Args);
       NewCall->takeName(&II);
       return IC.replaceInstUsesWith(II, NewCall);
@@ -2211,7 +2212,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
     Args[0] = Src0;
     Args[1] = Src1;
 
-    CallInst *NewII = IC.Builder.CreateIntrinsic(
+    Value *NewII = IC.Builder.CreateIntrinsic(
         IID, {Src0->getType(), Src1->getType()}, Args, &II);
     NewII->takeName(&II);
     return IC.replaceInstUsesWith(II, NewII);
@@ -2253,7 +2254,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
     Args[1] = Src0;
     Args[3] = Src1;
 
-    CallInst *NewII = IC.Builder.CreateIntrinsic(
+    Value *NewII = IC.Builder.CreateIntrinsic(
         IID, {II.getArgOperand(5)->getType(), Src0->getType(), Src1->getType()},
         Args, &II);
     NewII->takeName(&II);
@@ -2403,8 +2404,8 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
       Args[0] = IC.Builder.CreateShuffleVector(II.getOperand(0), EltMask);
   }
 
-  CallInst *NewCall =
-      IC.Builder.CreateIntrinsic(II.getIntrinsicID(), OverloadTys, Args);
+  CallInst *NewCall = IC.Builder.CreateIntrinsicWithoutFolding(
+      II.getIntrinsicID(), OverloadTys, Args);
   NewCall->takeName(&II);
   NewCall->copyMetadata(II);
   AttributeList OldAttrList = II.getAttributes();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index 300c2f8782ae1..14c069f025683 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -1802,7 +1802,7 @@ Value *SplitPtrStructs::handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr,
     }
   }
 
-  auto *Call = IRB.CreateIntrinsic(IID, Ty, Args);
+  auto *Call = IRB.CreateIntrinsicWithoutFolding(IID, Ty, Args);
   copyMetadata(Call, I);
   setAlign(Call, Alignment, Arg ? 1 : 0);
   Call->takeName(I);
@@ -1869,10 +1869,10 @@ PtrParts SplitPtrStructs::visitAtomicCmpXchgInst(AtomicCmpXchgInst &AI) {
     Aux |= AMDGPU::CPol::SLC;
   if (AI.isVolatile())
     Aux |= AMDGPU::CPol::VOLATILE;
-  auto *Call =
-      IRB.CreateIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap, Ty,
-                          {AI.getNewValOperand(), AI.getCompareOperand(), Rsrc,
-                           Off, IRB.getInt32(0), IRB.getInt32(Aux)});
+  auto *Call = IRB.CreateIntrinsicWithoutFolding(
+      Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap, Ty,
+      {AI.getNewValOperand(), AI.getCompareOperand(), Rsrc, Off,
+       IRB.getInt32(0), IRB.getInt32(Aux)});
   copyMetadata(Call, &AI);
   setAlign(Call, AI.getAlign(), 2);
   Call->takeName(&AI);
@@ -2324,7 +2324,7 @@ PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &I) {
         IID == Intrinsic::amdgcn_load_to_lds
             ? Intrinsic::amdgcn_raw_ptr_buffer_load_lds
             : Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds;
-    Instruction *NewLoad = IRB.CreateIntrinsic(
+    Value *NewLoad = IRB.CreateIntrinsic(
         NewIntr, {}, {Rsrc, LDSPtr, LoadSize, Off, SOffset, ImmOff, Aux});
     copyMetadata(NewLoad, &I);
     SplitUsers.insert(&I);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
index d9b018e3cefa0..2b80cd6096597 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
@@ -105,17 +105,18 @@ bool AMDGPULowerIntrinsicsImpl::visitBarrier(IntrinsicInst &I) {
     // The default cluster barrier expects one signal per workgroup. So we need
     // a workgroup barrier first.
     if (IsSingleWaveWG) {
-      B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_wave_barrier, {})
+      B.CreateIntrinsicWithoutFolding(B.getVoidTy(),
+                                      Intrinsic::amdgcn_wave_barrier, {})
           ->copyMetadata(I);
     } else {
       Value *BarrierID_32 = B.getInt32(AMDGPU::Barrier::WORKGROUP);
       Value *BarrierID_16 = B.getInt16(AMDGPU::Barrier::WORKGROUP);
-      CallInst *IsFirst = B.CreateIntrinsic(
+      auto *IsFirst = B.CreateIntrinsicWithoutFolding(
           B.getInt1Ty(), Intrinsic::amdgcn_s_barrier_signal_isfirst,
           {BarrierID_32});
       IsFirst->copyMetadata(I);
-      B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_s_barrier_wait,
-                        {BarrierID_16})
+      B.CreateIntrinsicWithoutFolding(
+           B.getVoidTy(), Intrinsic::amdgcn_s_barrier_wait, {BarrierID_16})
           ->copyMetadata(I);
 
       Instruction *ThenTerm =
@@ -127,13 +128,13 @@ bool AMDGPULowerIntrinsicsImpl::visitBarrier(IntrinsicInst &I) {
     // barrier in all waves.
     Value *BarrierID_32 = B.getInt32(AMDGPU::Barrier::CLUSTER);
     Value *BarrierID_16 = B.getInt16(AMDGPU::Barrier::CLUSTER);
-    B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_s_barrier_signal,
-                      {BarrierID_32})
+    B.CreateIntrinsicWithoutFolding(
+         B.getVoidTy(), Intrinsic::amdgcn_s_barrier_signal, {BarrierID_32})
         ->copyMetadata(I);
 
     B.SetInsertPoint(&I);
-    B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_s_barrier_wait,
-                      {BarrierID_16})
+    B.CreateIntrinsicWithoutFolding(
+         B.getVoidTy(), Intrinsic::amdgcn_s_barrier_wait, {BarrierID_16})
         ->copyMetadata(I);
 
     I.eraseFromParent();
@@ -160,7 +161,8 @@ bool AMDGPULowerIntrinsicsImpl::visitBarrier(IntrinsicInst &I) {
     // Down-grade waits, remove split signals.
     if (I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier ||
         I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_wait) {
-      B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_wave_barrier, {})
+      B.CreateIntrinsicWithoutFolding(B.getVoidTy(),
+                                      Intrinsic::amdgcn_wave_barrier, {})
           ->copyMetadata(I);
     } else if (I.getIntrinsicID() ==
                Intrinsic::amdgcn_s_barrier_signal_isfirst) {
@@ -176,11 +178,11 @@ bool AMDGPULowerIntrinsicsImpl::visitBarrier(IntrinsicInst &I) {
     // Lower to split barriers.
     Value *BarrierID_32 = B.getInt32(AMDGPU::Barrier::WORKGROUP);
     Value *BarrierID_16 = B.getInt16(AMDGPU::Barrier::WORKGROUP);
-    B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_s_barrier_signal,
-                      {BarrierID_32})
+    B.CreateIntrinsicWithoutFolding(
+         B.getVoidTy(), Intrinsic::amdgcn_s_barrier_signal, {BarrierID_32})
         ->copyMetadata(I);
-    B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_s_barrier_wait,
-                      {BarrierID_16})
+    B.CreateIntrinsicWithoutFolding(
+         B.getVoidTy(), Intrinsic::amdgcn_s_barrier_wait, {BarrierID_16})
         ->copyMetadata(I);
     I.eraseFromParent();
     return true;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index a13011eca5a60..b6168a52bb2ef 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -221,9 +221,9 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM,
   if (TotalKernArgSize == 0)
     return false;
 
-  CallInst *KernArgSegment =
-      Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {},
-                              nullptr, F.getName() + ".kernarg.segment");
+  CallInst *KernArgSegment = Builder.CreateIntrinsicWithoutFolding(
+      Intrinsic::amdgcn_kernarg_segment_ptr, {}, nullptr,
+      F.getName() + ".kernarg.segment");
   KernArgSegment->addRetAttr(Attribute::NonNull);
   KernArgSegment->addRetAttr(
       Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 0981f6becffc4..95e06dc8295d9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -1203,10 +1203,10 @@ AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
   const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
 
   if (!IsAMDHSA) {
-    CallInst *LocalSizeY =
-        Builder.CreateIntrinsic(Intrinsic::r600_read_local_size_y, {});
-    CallInst *LocalSizeZ =
-        Builder.CreateIntrinsic(Intrinsic::r600_read_local_size_z, {});
+    CallInst *LocalSizeY = Builder.CreateIntrinsicWithoutFolding(
+        Intrinsic::r600_read_local_size_y, {});
+    CallInst *LocalSizeZ = Builder.CreateIntrinsicWithoutFolding(
+        Intrinsic::r600_read_local_size_z, {});
 
     ST.makeLIDRangeMetadata(LocalSizeY);
     ST.makeLIDRangeMetadata(LocalSizeZ);
@@ -1249,7 +1249,7 @@ AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
   //   } hsa_kernel_dispatch_packet_t
   //
   CallInst *DispatchPtr =
-      Builder.CreateIntrinsic(Intrinsic::amdgcn_dispatch_ptr, {});
+      Builder.CreateIntrinsicWithoutFolding(Intrinsic::amdgcn_dispatch_ptr, {});
   DispatchPtr->addRetAttr(Attribute::NoAlias);
   DispatchPtr->addRetAttr(Attribute::NonNull);
   F.removeFnAttr("amdgpu-no-dispatch-ptr");
@@ -1760,7 +1760,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(
     case Intrinsic::objectsize: {
       Value *Src = Intr->getOperand(0);
 
-      CallInst *NewCall = Builder.CreateIntrinsic(
+      Value *NewCall = Builder.CreateIntrinsic(
           Intrinsic::objectsize,
           {Intr->getType(), PointerType::get(Context, AMDGPUAS::LOCAL_ADDRESS)},
           {Src, Intr->getOperand(1), Intr->getOperand(2), Intr->getOperand(3)});
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a6dc58e6da263..d3fae8573858d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -20762,8 +20762,8 @@ void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
 
   Value *LoadedShared = nullptr;
   if (FullFlatEmulation) {
-    CallInst *IsShared = Builder.CreateIntrinsic(Intrinsic::amdgcn_is_shared,
-                                                 {Addr}, nullptr, "is.shared");
+    Value *IsShared = Builder.CreateIntrinsic(Intrinsic::amdgcn_is_shared,
+                                              {Addr}, nullptr, "is.shared");
     Builder.CreateCondBr(IsShared, SharedBB, CheckPrivateBB);
     Builder.SetInsertPoint(SharedBB);
     Value *CastToLocal = Builder.CreateAddrSpaceCast(
@@ -20778,8 +20778,8 @@ void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
     Builder.SetInsertPoint(CheckPrivateBB);
   }
 
-  CallInst *IsPrivate = Builder.CreateIntrinsic(Intrinsic::amdgcn_is_private,
-                                                {Addr}, nullptr, "is.private");
+  Value *IsPrivate = Builder.CreateIntrinsic(Intrinsic::amdgcn_is_private,
+                                             {Addr}, nullptr, "is.private");
   Builder.CreateCondBr(IsPrivate, PrivateBB, GlobalBB);
 
   Builder.SetInsertPoint(PrivateBB);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index e08a4f5ebda08..248f5b3f9f083 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -21403,17 +21403,16 @@ Instruction *ARMTargetLowering::makeDMB(IRBuilderBase &Builder,
       Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
                         Builder.getInt32(0), Builder.getInt32(7),
                         Builder.getInt32(10), Builder.getInt32(5)};
-      return Builder.CreateIntrinsic(Intrinsic::arm_mcr, args);
-    } else {
-      // Instead of using barriers, atomic accesses on these subtargets use
-      // libcalls.
-      llvm_unreachable("makeDMB on a target so old that it has no barriers");
+      return Builder.CreateIntrinsicWithoutFolding(Intrinsic::arm_mcr, args);
     }
+    // Instead of using barriers, atomic accesses on these subtargets use
+    // libcalls.
+    llvm_unreachable("makeDMB on a target so old that it has no barriers");
   } else {
     // Only a full system barrier exists in the M-class architectures.
     Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
     Constant *CDomain = Builder.getInt32(Domain);
-    return Builder.CreateIntrinsic(Intrinsic::arm_dmb, CDomain);
+    return Builder.CreateIntrinsicWithoutFolding(Intrinsic::arm_dmb, CDomain);
   }
 }
 
@@ -21692,7 +21691,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
 
   Type *Tys[] = { Addr->getType() };
   Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
-  CallInst *CI = Builder.CreateIntrinsic(Int, Tys, Addr);
+  CallInst *CI = Builder.CreateIntrinsicWithoutFolding(Int, Tys, Addr);
 
   CI->addParamAttr(
       0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
@@ -21896,7 +21895,7 @@ bool ARMTargetLowering::lowerInterleavedLoad(
       BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
                                             VecTy->getNumElements() * Factor);
 
-    CallInst *VldN = createLoadIntrinsic(BaseAddr);
+    Value *VldN = createLoadIntrinsic(BaseAddr);
 
     // Replace uses of each shufflevector with the corresponding vector loaded
     // by ldN.
diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
index 869436a7e4964..ed95cba487e77 100644
--- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
+++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
@@ -460,14 +460,13 @@ Instruction *MVEGatherScatterLowering::tryCreateMaskedGatherBase(
     return nullptr;
   Value *Mask = I->getArgOperand(1);
   if (match(Mask, m_One()))
-    return Builder.CreateIntrinsic(Intrinsic::arm_mve_vldr_gather_base,
-                                   {Ty, Ptr->getType()},
-                                   {Ptr, Builder.getInt32(Increment)});
-  else
-    return Builder.CreateIntrinsic(
-        Intrinsic::arm_mve_vldr_gather_base_predicated,
-        {Ty, Ptr->getType(), Mask->getType()},
-        {Ptr, Builder.getInt32(Increment), Mask});
+    return Builder.CreateIntrinsicWithoutFolding(
+        Intrinsic::arm_mve_vldr_gather_base, {Ty, Ptr->getType()},
+        {Ptr, Builder.getInt32(Increment)});
+  return Builder.CreateIntrinsicWithoutFolding(
+      Intrinsic::arm_mve_vldr_gather_base_predicated,
+      {Ty, Ptr->getType(), Mask->getType()},
+      {Ptr, Builder.getInt32(Increment), Mask});
 }
 
 Instruction *MVEGatherScatterLowering::tryCreateMaskedGatherBaseWB(
@@ -481,14 +480,13 @@ Instruction *MVEGatherScatterLowering::tryCreateMaskedGatherBaseWB(
     return nullptr;
   Value *Mask = I->getArgOperand(1);
   if (match(Mask, m_One()))
-    return Builder.CreateIntrinsic(Intrinsic::arm_mve_vldr_gather_base_wb,
-                                   {Ty, Ptr->getType()},
-                                   {Ptr, Builder.getInt32(Increment)});
-  else
-    return Builder.CreateIntrinsic(
-        Intrinsic::arm_mve_vldr_gather_base_wb_predicated,
-        {Ty, Ptr->getType(), Mask->getType()},
-        {Ptr, Builder.getInt32(Increment), Mask});
+    return Builder.CreateIntrinsicWithoutFolding(
+        Intrinsic::arm_mve_vldr_gather_base_wb, {Ty, Ptr->getType()},
+        {Ptr, Builder.getInt32(Increment)});
+  return Builder.CreateIntrinsicWithoutFolding(
+      Intrinsic::arm_mve_vldr_gather_base_wb_predicated,
+      {Ty, Ptr->getType(), Mask->getType()},
+      {Ptr, Builder.getInt32(Increment), Mask});
 }
 
 Instruction *MVEGatherScatterLowering::tryCreateMaskedGatherOffset(
@@ -555,13 +553,13 @@ Instruction *MVEGatherScatterLowering::tryCreateMaskedGatherOffset(
   Value *Mask = I->getArgOperand(1);
   Instruction *Load = nullptr;
   if (!match(Mask, m_One()))
-    Load = Builder.CreateIntrinsic(
+    Load = Builder.CreateIntrinsicWithoutFolding(
         Intrinsic::arm_mve_vldr_gather_offset_predicated,
         {ResultTy, BasePtr->getType(), Offsets->getType(), Mask->getType()},
         {BasePtr, Offsets, Builder.getInt32(MemoryTy->getScalarSizeInBits()),
          Builder.getInt32(Scale), Builder.getInt32(Unsigned), Mask});
   else
-    Load = Builder.CreateIntrinsic(
+    Load = Builder.CreateIntrinsicWithoutFolding(
         Intrinsic::arm_mve_vldr_gather_offset,
         {ResultTy, BasePtr->getType(), Offsets->getType()},
         {BasePtr, Offsets, Builder.getInt32(MemoryTy->getScalarSizeInBits()),
@@ -626,14 +624,14 @@ Instruction *MVEGatherScatterLowering::tryCreateMaskedScatterBase(
   //  int_arm_mve_vstr_scatter_base(_predicated) addr, offset, data(, mask)
   LLVM_DEBUG(dbgs() << "masked scatters: storing to a vector of pointers\n");
   if (match(Mask, m_One()))
-    return Builder.CreateIntrinsic(Intrinsic::arm_mve_vstr_scatter_base,
-                                   {Ptr->getType(), Input->getType()},
-                                   {Ptr, Builder.getInt32(Increment), Input});
-  else
-    return Builder.CreateIntrinsic(
-        Intrinsic::arm_mve_vstr_scatter_base_predicated,
-        {Ptr->getType(), Input->getType(), Mask->getType()},
-        {Ptr, Builder.getInt32(Increment), Input, Mask});
+    return Builder.CreateIntrinsicWithoutFolding(
+        Intrinsic::arm_mve_vstr_scatter_base,
+        {Ptr->getType(), Input->getType()},
+        {Ptr, Builder.getInt32(Increment), Input});
+  return Builder.CreateIntrinsicWithoutFolding(
+      Intrinsic::arm_mve_vstr_scatter_base_predicated,
+      {Ptr->getType(), Input->getType(), Mask->getType()},
+      {Ptr, Builder.getInt32(Increment), Input, Mask});
 }
 
 Instruction *MVEGatherScatterLowering::tryCreateMaskedScatterBaseWB(
@@ -648,14 +646,14 @@ Instruction *MVEGatherScatterLowering::tryCreateMaskedScatterBaseWB(
     return nullptr;
   Value *Mask = I->getArgOperand(2);
   if (match(Mask, m_One()))
-    return Builder.CreateIntrinsic(Intrinsic::arm_mve_vstr_scatter_base_wb,
-                                   {Ptr->getType(), Input->getType()},
-                                   {Ptr, Builder.getInt32(Increment), Input});
-  else
-    return Builder.CreateIntrinsic(
-        Intrinsic::arm_mve_vstr_scatter_base_wb_predicated,
-        {Ptr->getType(), Input->getType(), Mask->getType()},
-        {Ptr, Builder.getInt32(Increment), Input, Mask});
+    return Builder.CreateIntrinsicWithoutFolding(
+        Intrinsic::arm_mve_vstr_scatter_base_wb,
+        {Ptr->getType(), Input->getType()},
+        {Ptr, Builder.getInt32(Increment), Input});
+  return Builder.CreateIntrinsicWithoutFolding(
+      Intrinsic::arm_mve_vstr_scatter_base_wb_predicated,
+      {Ptr->getType(), Input->getType(), Mask->getType()},
+      {Ptr, Builder.getInt32(Increment), Input, Mask});
 }
 
 Instruction *MVEGatherScatterLowering::tryCreateMaskedScatterOffset(
@@ -707,20 +705,19 @@ Instruction *MVEGatherScatterLowering::tryCreateMaskedScatterOffset(
   if (ExtendInput)
     Input = Builder.CreateZExt(Input, InputTy);
   if (!match(Mask, m_One()))
-    return Builder.CreateIntrinsic(
+    return Builder.CreateIntrinsicWithoutFolding(
         Intrinsic::arm_mve_vstr_scatter_offset_predicated,
         {BasePtr->getType(), Offsets->getType(), Input->getType(),
          Mask->getType()},
         {BasePtr, Offsets, Input,
          Builder.getInt32(MemoryTy->getScalarSizeInBits()),
          Builder.getInt32(Scale), Mask});
-  else
-    return Builder.CreateIntrinsic(
-        Intrinsic::arm_mve_vstr_scatter_offset,
-        {BasePtr->getType(), Offsets->getType(), Input->getType()},
-        {BasePtr, Offsets, Input,
-         Builder.getInt32(MemoryTy->getScalarSizeInBits()),
-         Builder.getInt32(Scale)});
+  return Builder.CreateIntrinsicWithoutFolding(
+      Intrinsic::arm_mve_vstr_scatter_offset,
+      {BasePtr->getType(), Offsets->getType(), Input->getType()},
+      {BasePtr, Offsets, Input,
+       Builder.getInt32(MemoryTy->getScalarSizeInBits()),
+       Builder.getInt32(Scale)});
 }
 
 Instruction *MVEGatherScatterLowering::tryCreateIncrementingGatScat(
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 579d66f3ad070..06906d3b6d4e3 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -443,8 +443,8 @@ static Value *expandExpIntrinsic(CallInst *Orig) {
                              ConstantFP::get(EltTy, numbers::log2ef))
                        : ConstantFP::get(EltTy, numbers::log2ef);
   Value *NewX = Builder.CreateFMul(Log2eConst, X);
-  auto *Exp2Call =
-      Builder.CreateIntrinsic(Ty, Intrinsic::exp2, {NewX}, nullptr, "dx.exp2");
+  auto *Exp2Call = Builder.CreateIntrinsicWithoutFolding(
+      Ty, Intrinsic::exp2, {NewX}, nullptr, "dx.exp2");
   Exp2Call->setTailCall(Orig->isTailCall());
   Exp2Call->setAttributes(Orig->getAttributes());
   return Exp2Call;
@@ -569,8 +569,8 @@ static Value *expandLogIntrinsic(CallInst *Orig,
                                  cast<FixedVectorType>(Ty)->getNumElements()),
                              ConstantFP::get(EltTy, LogConstVal))
                        : ConstantFP::get(EltTy, LogConstVal);
-  auto *Log2Call =
-      Builder.CreateIntrinsic(Ty, Intrinsic::log2, {X}, nullptr, "elt.log2");
+  auto *Log2Call = Builder.CreateIntrinsicWithoutFolding(
+      Ty, Intrinsic::log2, {X}, nullptr, "elt.log2");
   Log2Call->setTailCall(Orig->isTailCall());
   Log2Call->setAttributes(Orig->getAttributes());
   return Builder.CreateFMul(Ln2Const, Log2Call);
@@ -625,8 +625,8 @@ static Value *expandAtan2Intrinsic(CallInst *Orig) {
 
   Value *Tan = Builder.CreateFDiv(Y, X);
 
-  CallInst *Atan =
-      Builder.CreateIntrinsic(Ty, Intrinsic::atan, {Tan}, nullptr, "Elt.Atan");
+  CallInst *Atan = Builder.CreateIntrinsicWithoutFolding(
+      Ty, Intrinsic::atan, {Tan}, nullptr, "Elt.Atan");
   Atan->setTailCall(Orig->isTailCall());
   Atan->setAttributes(Orig->getAttributes());
 
@@ -734,8 +734,8 @@ static Value *expandPowIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId) {
   auto *Log2Call =
       Builder.CreateIntrinsic(Ty, Intrinsic::log2, {X}, nullptr, "elt.log2");
   auto *Mul = Builder.CreateFMul(Log2Call, Y);
-  auto *Exp2Call =
-      Builder.CreateIntrinsic(Ty, Intrinsic::exp2, {Mul}, nullptr, "elt.exp2");
+  auto *Exp2Call = Builder.CreateIntrinsicWithoutFolding(
+      Ty, Intrinsic::exp2, {Mul}, nullptr, "elt.exp2");
   Exp2Call->setTailCall(Orig->isTailCall());
   Exp2Call->setAttributes(Orig->getAttributes());
   return Exp2Call;
@@ -818,7 +818,7 @@ static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw) {
       Args.push_back(Builder.CreateAdd(Orig->getOperand(2), Tmp));
     }
 
-    CallInst *Load = Builder.CreateIntrinsic(LoadType, LoadIntrinsic, Args);
+    Value *Load = Builder.CreateIntrinsic(LoadType, LoadIntrinsic, Args);
     Loads.push_back(Load);
 
     // extract the buffer load's result
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index efac3dd7900c1..78bd196b394c8 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -177,7 +177,7 @@ class OpLowerer {
   /// piecemeal way - we can add the casts in to avoid updating all of the uses
   /// or defs, and by the end all of the casts will be redundant.
   Value *createTmpHandleCast(Value *V, Type *Ty) {
-    CallInst *Cast = OpBuilder.getIRB().CreateIntrinsic(
+    auto *Cast = OpBuilder.getIRB().CreateIntrinsicWithoutFolding(
         Intrinsic::dx_resource_casthandle, {Ty, V->getType()}, {V});
     CleanupCasts.push_back(Cast);
     return Cast;
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index 80b6545861054..9e305f8039812 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -2443,7 +2443,7 @@ Value *HvxIdioms::processVGather(Instruction &In) const {
     return nullptr;
   LLVM_DEBUG(dbgs() << "  Indexes        : " << *Indexes << "\n");
 
-  Instruction *Gather = nullptr;
+  Value *Gather = nullptr;
   Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
   if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) {
     // We fully assume the address space is in VTCM. We also assume that all
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 42e65d7c9c847..ef317626451da 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -291,7 +291,7 @@ static void convertToParamAS(ArrayRef<Use *> OldUses, Value *Param) {
 // alignment of the return value based on the alignment of the argument.
 static CallInst *createNVVMInternalAddrspaceWrap(IRBuilder<> &IRB,
                                                  Argument &Arg) {
-  CallInst *ArgInParam = IRB.CreateIntrinsic(
+  CallInst *ArgInParam = IRB.CreateIntrinsicWithoutFolding(
       Intrinsic::nvvm_internal_addrspace_wrap,
       {IRB.getPtrTy(ADDRESS_SPACE_ENTRY_PARAM), Arg.getType()}, &Arg, {},
       Arg.getName() + ".param");
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e43021b1f5379..fe2797c8d8d1d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -13062,8 +13062,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
 //  Other Lowering Code
 //===----------------------------------------------------------------------===//
 
-static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
-  return Builder.CreateIntrinsic(Id, {});
+static CallInst *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
+  return Builder.CreateIntrinsicWithoutFolding(Id, {});
 }
 
 Value *PPCTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
@@ -13156,8 +13156,8 @@ Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
     if (isa<LoadInst>(Inst))
-      return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
-                                     {Inst});
+      return Builder.CreateIntrinsicWithoutFolding(Intrinsic::ppc_cfence,
+                                                   {Inst->getType()}, {Inst});
     // FIXME: Can use isync for rmw operation.
     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
   }
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index 7026541cbd53d..55c10533c18db 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -239,7 +239,7 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
   if (!isLegalInterleavedAccessType(VTy, MaskFactor, Alignment, AS, DL))
     return false;
 
-  CallInst *SegLoad = nullptr;
+  Value *SegLoad = nullptr;
   if (MaskFactor < Factor && MaskFactor != 1) {
     // Lower to strided segmented load.
     unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType());
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 42e398196438d..7b5eb63351a8d 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -605,7 +605,7 @@ CallInst *SPIRVEmitIntrinsics::buildSpvPtrcast(Function *F, Value *Op,
   SmallVector<Value *, 2> Args = {Op, buildMD(getNormalizedPoisonValue(ElemTy)),
                                   B.getInt32(getPointerAddressSpace(OpTy))};
   CallInst *PtrCasted =
-      B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args);
+      B.CreateIntrinsicWithoutFolding(Intrinsic::spv_ptrcast, {Types}, Args);
   GR->buildAssignPtr(B, ElemTy, PtrCasted);
   return PtrCasted;
 }
@@ -892,7 +892,8 @@ SPIRVEmitIntrinsics::buildLogicalAccessChainFromGEP(GetElementPtrInst &GEP) {
   Args.push_back(B.getInt1(GEP.isInBounds()));
   Args.push_back(GEP.getOperand(0));
   llvm::append_range(Args, Indices);
-  auto *NewI = B.CreateIntrinsic(Intrinsic::spv_gep, {Types}, {Args});
+  auto *NewI =
+      B.CreateIntrinsicWithoutFolding(Intrinsic::spv_gep, {Types}, {Args});
   replaceAllUsesWithAndErase(B, &GEP, NewI);
   return NewI;
 }
@@ -1553,8 +1554,8 @@ void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old,
     if (isAssignTypeInstr(U)) {
       B.SetInsertPoint(U);
       SmallVector<Value *, 2> Args = {New, U->getOperand(1)};
-      CallInst *AssignCI =
-          B.CreateIntrinsic(Intrinsic::spv_assign_type, {New->getType()}, Args);
+      CallInst *AssignCI = B.CreateIntrinsicWithoutFolding(
+          Intrinsic::spv_assign_type, {New->getType()}, Args);
       GR->addAssignPtrTypeInstr(New, AssignCI);
       U->eraseFromParent();
     } else if (isMemInstrToReplace(U) || isa<ReturnInst>(U) ||
@@ -1616,7 +1617,8 @@ void SPIRVEmitIntrinsics::preprocessUndefs(IRBuilder<> &B) {
         setInsertPointSkippingPhis(B, I);
         BPrepared = true;
       }
-      auto *IntrUndef = B.CreateIntrinsic(Intrinsic::spv_undef, {});
+      auto *IntrUndef =
+          B.CreateIntrinsicWithoutFolding(Intrinsic::spv_undef, {});
       I->replaceUsesOfWith(Op, IntrUndef);
       AggrConsts[IntrUndef] = AggrUndef;
       AggrConstTypes[IntrUndef] = AggrUndef->getType();
@@ -1646,8 +1648,8 @@ void SPIRVEmitIntrinsics::preprocessPoisons(IRBuilder<> &B) {
           setInsertPointSkippingPhis(B, &I);
           BPrepared = true;
         }
-        auto *Call =
-            B.CreateIntrinsic(Intrinsic::spv_poison, {B.getInt32Ty()}, {});
+        auto *Call = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_poison,
+                                                     {B.getInt32Ty()}, {});
         AggrConsts[Call] = Poison;
         AggrConstTypes[Call] = OpTy;
         Replacement = Call;
@@ -1735,8 +1737,8 @@ void SPIRVEmitIntrinsics::preprocessCompositeConstants(IRBuilder<> &B) {
               PrepareInsert();
               Type *PoisonTy = Op->getType();
               if (PoisonTy->isAggregateType()) {
-                auto *Call = B.CreateIntrinsic(Intrinsic::spv_poison,
-                                               {B.getInt32Ty()}, {});
+                auto *Call = B.CreateIntrinsicWithoutFolding(
+                    Intrinsic::spv_poison, {B.getInt32Ty()}, {});
                 AggrConsts[Call] = cast<PoisonValue>(Op);
                 AggrConstTypes[Call] = PoisonTy;
                 Op = Call;
@@ -1747,8 +1749,8 @@ void SPIRVEmitIntrinsics::preprocessCompositeConstants(IRBuilder<> &B) {
             Args.push_back(Op);
           }
         PrepareInsert();
-        auto *CI =
-            B.CreateIntrinsic(Intrinsic::spv_const_composite, {ResTy}, {Args});
+        auto *CI = B.CreateIntrinsicWithoutFolding(
+            Intrinsic::spv_const_composite, {ResTy}, {Args});
         Worklist.push(CI);
         I->replaceUsesOfWith(Op, CI);
         KeepInst = true;
@@ -1871,8 +1873,8 @@ Instruction *SPIRVEmitIntrinsics::visitSwitchInst(SwitchInst &I) {
     BBCases.push_back(Case.getCaseSuccessor());
     Args.push_back(BlockAddress::get(F, Case.getCaseSuccessor()));
   }
-  CallInst *NewI = B.CreateIntrinsic(Intrinsic::spv_switch,
-                                     {I.getOperand(0)->getType()}, {Args});
+  CallInst *NewI = B.CreateIntrinsicWithoutFolding(
+      Intrinsic::spv_switch, {I.getOperand(0)->getType()}, {Args});
   // remove switch to avoid its unneeded and undesirable unwrap into branches
   // and conditions
   replaceAllUsesWith(&I, NewI);
@@ -1907,7 +1909,7 @@ Instruction *SPIRVEmitIntrinsics::visitIntrinsicInst(IntrinsicInst &I) {
   for (unsigned J = 0; J < SGEP->getNumIndices(); ++J)
     Args.push_back(SGEP->getIndexOperand(J));
 
-  auto *NewI = B.CreateIntrinsic(Intrinsic::spv_gep, Types, Args);
+  auto *NewI = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_gep, Types, Args);
   replaceAllUsesWithAndErase(B, &I, NewI);
   return NewI;
 }
@@ -2003,7 +2005,8 @@ Instruction *SPIRVEmitIntrinsics::visitGetElementPtrInst(GetElementPtrInst &I) {
         Args.push_back(I.getPointerOperand());
         Args.append(NewIndices.begin(), NewIndices.end());
 
-        auto *NewI = B.CreateIntrinsic(Intrinsic::spv_gep, {Types}, {Args});
+        auto *NewI = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_gep,
+                                                     {Types}, {Args});
         replaceAllUsesWithAndErase(B, &I, NewI);
         return NewI;
       }
@@ -2014,7 +2017,8 @@ Instruction *SPIRVEmitIntrinsics::visitGetElementPtrInst(GetElementPtrInst &I) {
   SmallVector<Value *, 4> Args;
   Args.push_back(B.getInt1(I.isInBounds()));
   llvm::append_range(Args, I.operands());
-  auto *NewI = B.CreateIntrinsic(Intrinsic::spv_gep, {Types}, {Args});
+  auto *NewI =
+      B.CreateIntrinsicWithoutFolding(Intrinsic::spv_gep, {Types}, {Args});
   replaceAllUsesWithAndErase(B, &I, NewI);
   return NewI;
 }
@@ -2036,7 +2040,8 @@ Instruction *SPIRVEmitIntrinsics::visitBitCastInst(BitCastInst &I) {
 
   SmallVector<Type *, 2> Types = {I.getType(), Source->getType()};
   SmallVector<Value *> Args(I.op_begin(), I.op_end());
-  auto *NewI = B.CreateIntrinsic(Intrinsic::spv_bitcast, {Types}, {Args});
+  auto *NewI =
+      B.CreateIntrinsicWithoutFolding(Intrinsic::spv_bitcast, {Types}, {Args});
   replaceAllUsesWithAndErase(B, &I, NewI);
   return NewI;
 }
@@ -2315,7 +2320,8 @@ Instruction *SPIRVEmitIntrinsics::visitInsertElementInst(InsertElementInst &I) {
   IRBuilder<> B(I.getParent());
   B.SetInsertPoint(&I);
   SmallVector<Value *> Args(I.op_begin(), I.op_end());
-  auto *NewI = B.CreateIntrinsic(Intrinsic::spv_insertelt, {Types}, {Args});
+  auto *NewI = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_insertelt,
+                                               {Types}, {Args});
   replaceAllUsesWithAndErase(B, &I, NewI);
   return NewI;
 }
@@ -2332,7 +2338,8 @@ SPIRVEmitIntrinsics::visitExtractElementInst(ExtractElementInst &I) {
   SmallVector<Type *, 3> Types = {I.getType(), I.getVectorOperandType(),
                                   I.getIndexOperand()->getType()};
   SmallVector<Value *, 2> Args = {I.getVectorOperand(), I.getIndexOperand()};
-  auto *NewI = B.CreateIntrinsic(Intrinsic::spv_extractelt, {Types}, {Args});
+  auto *NewI = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_extractelt,
+                                               {Types}, {Args});
   replaceAllUsesWithAndErase(B, &I, NewI);
   return NewI;
 }
@@ -2351,7 +2358,7 @@ Instruction *SPIRVEmitIntrinsics::visitInsertValueInst(InsertValueInst &I) {
   for (auto &Op : I.indices())
     Args.push_back(B.getInt32(Op));
   Instruction *NewI =
-      B.CreateIntrinsic(Intrinsic::spv_insertv, {Types}, {Args});
+      B.CreateIntrinsicWithoutFolding(Intrinsic::spv_insertv, {Types}, {Args});
   replaceMemInstrUses(&I, NewI, B);
   return NewI;
 }
@@ -2370,8 +2377,8 @@ Instruction *SPIRVEmitIntrinsics::visitExtractValueInst(ExtractValueInst &I) {
   SmallVector<Value *> Args(I.operands());
   for (auto &Op : I.indices())
     Args.push_back(B.getInt32(Op));
-  auto *NewI =
-      B.CreateIntrinsic(Intrinsic::spv_extractv, {I.getType()}, {Args});
+  auto *NewI = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_extractv,
+                                               {I.getType()}, {Args});
   replaceAllUsesWithAndErase(B, &I, NewI);
   return NewI;
 }
@@ -2395,8 +2402,8 @@ Instruction *SPIRVEmitIntrinsics::visitLoadInst(LoadInst &I) {
     IntrinsicId = Intrinsic::spv_atomic_load;
     Args.push_back(B.getInt8(static_cast<uint8_t>(I.getOrdering())));
   }
-  CallInst *NewI =
-      B.CreateIntrinsic(IntrinsicId, {I.getOperand(0)->getType()}, Args);
+  CallInst *NewI = B.CreateIntrinsicWithoutFolding(
+      IntrinsicId, {I.getOperand(0)->getType()}, Args);
 
   replaceMemInstrUses(&I, NewI, B);
   return NewI;
@@ -2435,7 +2442,7 @@ Instruction *SPIRVEmitIntrinsics::visitStoreInst(StoreInst &I) {
     IntrinsicId = Intrinsic::spv_atomic_store;
     Args.push_back(B.getInt8(static_cast<uint8_t>(I.getOrdering())));
   }
-  auto *NewI = B.CreateIntrinsic(
+  auto *NewI = B.CreateIntrinsicWithoutFolding(
       IntrinsicId, {I.getValueOperand()->getType(), PtrOp->getType()}, Args);
   NewI->copyMetadata(I);
   I.eraseFromParent();
@@ -2460,11 +2467,11 @@ Instruction *SPIRVEmitIntrinsics::visitAllocaInst(AllocaInst &I) {
   Type *PtrTy = I.getType();
   auto *NewI =
       ArraySize
-          ? B.CreateIntrinsic(Intrinsic::spv_alloca_array,
-                              {PtrTy, ArraySize->getType()},
-                              {ArraySize, B.getInt32(I.getAlign().value())})
-          : B.CreateIntrinsic(Intrinsic::spv_alloca, {PtrTy},
-                              {B.getInt32(I.getAlign().value())});
+          ? B.CreateIntrinsicWithoutFolding(
+                Intrinsic::spv_alloca_array, {PtrTy, ArraySize->getType()},
+                {ArraySize, B.getInt32(I.getAlign().value())})
+          : B.CreateIntrinsicWithoutFolding(Intrinsic::spv_alloca, {PtrTy},
+                                            {B.getInt32(I.getAlign().value())});
   replaceAllUsesWithAndErase(B, &I, NewI);
   return NewI;
 }
@@ -2486,8 +2493,8 @@ Instruction *SPIRVEmitIntrinsics::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
       static_cast<uint32_t>(getMemSemantics(I.getSuccessOrdering())) | ScSem));
   Args.push_back(B.getInt32(
       static_cast<uint32_t>(getMemSemantics(I.getFailureOrdering())) | ScSem));
-  auto *NewI = B.CreateIntrinsic(Intrinsic::spv_cmpxchg,
-                                 {I.getPointerOperand()->getType()}, {Args});
+  auto *NewI = B.CreateIntrinsicWithoutFolding(
+      Intrinsic::spv_cmpxchg, {I.getPointerOperand()->getType()}, {Args});
   replaceMemInstrUses(&I, NewI, B);
   return NewI;
 }
@@ -2567,7 +2574,7 @@ shouldEmitIntrinsicsForGlobalValue(const GlobalVariableUsers &GVUsers,
 Value *SPIRVEmitIntrinsics::buildSpvUndefComposite(Type *AggrTy,
                                                    IRBuilder<> &B) {
   auto MakeLeaf = [&](Type *ElemTy) -> Instruction * {
-    auto *Leaf = B.CreateIntrinsic(Intrinsic::spv_undef, {});
+    auto *Leaf = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_undef, {});
     AggrConsts[Leaf] = PoisonValue::get(ElemTy);
     AggrConstTypes[Leaf] = ElemTy;
     return Leaf;
@@ -2586,8 +2593,8 @@ Value *SPIRVEmitIntrinsics::buildSpvUndefComposite(Type *AggrTy,
       Elems.push_back(Entry);
     }
   }
-  auto *Composite = B.CreateIntrinsic(Intrinsic::spv_const_composite,
-                                      {B.getInt32Ty()}, Elems);
+  auto *Composite = B.CreateIntrinsicWithoutFolding(
+      Intrinsic::spv_const_composite, {B.getInt32Ty()}, Elems);
   AggrConsts[Composite] = PoisonValue::get(AggrTy);
   AggrConstTypes[Composite] = AggrTy;
   return Composite;
@@ -2613,8 +2620,8 @@ void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV,
           isa<PoisonValue>(Init) &&
           STI->canUseExtension(SPIRV::Extension::SPV_KHR_poison_freeze);
       if (UsePoison) {
-        auto *Call =
-            B.CreateIntrinsic(Intrinsic::spv_poison, {B.getInt32Ty()}, {});
+        auto *Call = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_poison,
+                                                     {B.getInt32Ty()}, {});
         AggrConsts[Call] = cast<PoisonValue>(Init);
         AggrConstTypes[Call] = Init->getType();
         InitOp = Call;
@@ -2624,8 +2631,8 @@ void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV,
     }
     Type *Ty = isAggrConstForceInt32(Init) ? B.getInt32Ty() : Init->getType();
     Constant *Const = isAggrConstForceInt32(Init) ? B.getInt32(1) : Init;
-    auto *InitInst = B.CreateIntrinsic(Intrinsic::spv_init_global,
-                                       {GV.getType(), Ty}, {&GV, Const});
+    auto *InitInst = B.CreateIntrinsicWithoutFolding(
+        Intrinsic::spv_init_global, {GV.getType(), Ty}, {&GV, Const});
     InitInst->setArgOperand(1, InitOp);
   }
   if (!Init && GV.use_empty())
@@ -3083,8 +3090,8 @@ void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I,
       SmallVector<Value *, 2> Args = {
           NewOp, buildMD(getNormalizedPoisonValue(OpElemTy)),
           B.getInt32(getPointerAddressSpace(OpTy))};
-      CallInst *PtrCasted =
-          B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args);
+      CallInst *PtrCasted = B.CreateIntrinsicWithoutFolding(
+          Intrinsic::spv_ptrcast, {Types}, Args);
       GR->buildAssignPtr(B, OpElemTy, PtrCasted);
       NewOp = PtrCasted;
     }
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 343311fb44475..8bff367fc3c38 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -2229,7 +2229,7 @@ void SPIRVGlobalRegistry::buildAssignType(IRBuilder<> &B, Type *Ty,
         MDString::get(Ctx, Arg->getName())};
     B.CreateIntrinsic(Intrinsic::spv_value_md,
                       {MetadataAsValue::get(Ctx, MDTuple::get(Ctx, ArgMDs))});
-    AssignCI = B.CreateIntrinsic(Intrinsic::fake_use, {Arg});
+    AssignCI = cast<CallInst>(B.CreateIntrinsic(Intrinsic::fake_use, {Arg}));
   } else {
     AssignCI = buildIntrWithMD(Intrinsic::spv_assign_type, {Arg->getType()},
                                OfType, Arg, {}, B);
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
index cb03dcae21ede..5832d88af750c 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
@@ -397,8 +397,7 @@ class SPIRVLegalizePointerCastImpl {
     SmallVector<Type *, 4> Types = {Vector->getType(), Vector->getType(),
                                     Element->getType(), Int32Ty};
     SmallVector<Value *> Args = {Vector, Element, B.getInt32(Index)};
-    Instruction *NewI =
-        B.CreateIntrinsic(Intrinsic::spv_insertelt, {Types}, {Args});
+    Value *NewI = B.CreateIntrinsic(Intrinsic::spv_insertelt, {Types}, {Args});
     buildAssignType(B, Vector->getType(), NewI);
     return NewI;
   }
@@ -410,8 +409,7 @@ class SPIRVLegalizePointerCastImpl {
     Type *Int32Ty = Type::getInt32Ty(B.getContext());
     SmallVector<Type *, 3> Types = {ElementType, Vector->getType(), Int32Ty};
     SmallVector<Value *> Args = {Vector, B.getInt32(Index)};
-    Instruction *NewI =
-        B.CreateIntrinsic(Intrinsic::spv_extractelt, {Types}, {Args});
+    Value *NewI = B.CreateIntrinsic(Intrinsic::spv_extractelt, {Types}, {Args});
     buildAssignType(B, ElementType, NewI);
     return NewI;
   }
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
index 29fb4d1d9ec04..ee4da13ebe5b4 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
@@ -160,11 +160,12 @@ static bool lowerIntrinsicToFunction(IntrinsicInst *Intrinsic,
   case Intrinsic::bswap: {
     BasicBlock *EntryBB = BasicBlock::Create(M->getContext(), "entry", F);
     IRBuilder<> IRB(EntryBB);
-    auto *BSwap = IRB.CreateIntrinsic(Intrinsic::bswap, Intrinsic->getType(),
-                                      F->getArg(0));
-    IRB.CreateRet(BSwap);
     IntrinsicLowering IL(M->getDataLayout());
-    IL.LowerIntrinsicCall(BSwap);
+    IRB.CreateIntrinsic(Intrinsic::bswap, Intrinsic->getType(), F->getArg(0),
+                        {}, "", {}, [&](CallInst *BSwap) {
+                          IRB.CreateRet(BSwap);
+                          IL.LowerIntrinsicCall(BSwap);
+                        });
     break;
   }
   default:
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
index 2b48986826b6a..f5da6dc55ff38 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
@@ -997,7 +997,7 @@ CallInst *buildIntrWithMD(Intrinsic::ID IntrID, ArrayRef<Type *> Types,
   Args.push_back(Arg2);
   Args.push_back(buildMD(Arg));
   llvm::append_range(Args, Imms);
-  return B.CreateIntrinsic(IntrID, {Types}, Args);
+  return B.CreateIntrinsicWithoutFolding(IntrID, {Types}, Args);
 }
 
 // Return true if there is an opaque pointer type nested in the argument.
diff --git a/llvm/lib/Target/SystemZ/SystemZTDC.cpp b/llvm/lib/Target/SystemZ/SystemZTDC.cpp
index 1894e71792a3f..6948a8bdd9b6d 100644
--- a/llvm/lib/Target/SystemZ/SystemZTDC.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTDC.cpp
@@ -364,7 +364,7 @@ bool SystemZTDCPass::runOnFunction(Function &F) {
       // Call the intrinsic, compare result with 0.
       IRBuilder<> IRB(I);
       Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
-      Instruction *TDC =
+      Value *TDC =
           IRB.CreateIntrinsic(Intrinsic::s390_tdc, V->getType(), {V, MaskVal});
       Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
       I->replaceAllUsesWith(ICmp);
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index df42d1f4388ba..1ca21b6642a6a 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -505,8 +505,8 @@ static Instruction *createTileStore(Instruction *TileDef, Value *Ptr) {
   Value *Stride = Builder.getInt64(64);
   std::array<Value *, 5> Args = {Row, Col, Ptr, Stride, TileDef};
 
-  Instruction *TileStore =
-      Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, Args);
+  Instruction *TileStore = Builder.CreateIntrinsicWithoutFolding(
+      Intrinsic::x86_tilestored64_internal, Args);
   return TileStore;
 }
 
@@ -824,11 +824,12 @@ bool X86LowerAMXCast::optimizeAMXCastFromPhi(
         // Create tilezero at the end of incoming block.
         auto *Block = OldPN->getIncomingBlock(I);
         BasicBlock::iterator Iter = Block->getTerminator()->getIterator();
-        Instruction *NewInst = Builder.CreateIntrinsic(
+        Instruction *NewInst = Builder.CreateIntrinsicWithoutFolding(
             Intrinsic::x86_tilezero_internal, {}, {Row, Col});
         NewInst->moveBefore(Iter);
-        NewInst = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
-                                          {IncValue->getType()}, {NewInst});
+        NewInst = Builder.CreateIntrinsicWithoutFolding(
+            Intrinsic::x86_cast_tile_to_vector, {IncValue->getType()},
+            {NewInst});
         NewInst->moveBefore(Iter);
         // Replace InValue with new Value.
         OldPN->setIncomingValue(I, NewInst);
diff --git a/llvm/lib/Transforms/IPO/FatLTOCleanup.cpp b/llvm/lib/Transforms/IPO/FatLTOCleanup.cpp
index f667187b76ed5..6e90896304b56 100644
--- a/llvm/lib/Transforms/IPO/FatLTOCleanup.cpp
+++ b/llvm/lib/Transforms/IPO/FatLTOCleanup.cpp
@@ -79,7 +79,7 @@ static bool cleanUpTypeCheckedLoad(Module &M, Function &CheckedLoadFn,
     Value *Offset = I->getOperand(1);
     Type *PtrTy = I->getType()->getStructElementType(0);
     ConstantInt *True = ConstantInt::getTrue(M.getContext());
-    Instruction *Load;
+    Value *Load;
     if (IsRelative) {
       Load =
           IRB.CreateIntrinsic(Intrinsic::load_relative, {Offset->getType()},
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index b4686a6a63e86..0f9b300912660 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1634,7 +1634,7 @@ Instruction *InstCombinerImpl::foldLogicOfIsFPClass(BinaryOperator &BO,
       return replaceInstUsesWith(BO, II);
     }
 
-    CallInst *NewClass =
+    Value *NewClass =
         Builder.CreateIntrinsic(Intrinsic::is_fpclass, {ClassVal0->getType()},
                                 {ClassVal0, Builder.getInt32(NewClassMask)});
     return replaceInstUsesWith(BO, NewClass);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 9f6044df52553..7bed2fef8f6dc 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1600,7 +1600,7 @@ Value *InstCombinerImpl::foldReversedIntrinsicOperands(IntrinsicInst *II) {
 
   // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
   Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
-  Instruction *NewIntrinsic = Builder.CreateIntrinsic(
+  Value *NewIntrinsic = Builder.CreateIntrinsic(
       II->getType(), II->getIntrinsicID(), NewArgs, FPI);
   return Builder.CreateVectorReverse(NewIntrinsic);
 }
@@ -3035,8 +3035,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     if (ElementCount::isKnownGT(NegatedCount, RetCount)) {
       SmallVector<Value *, 5> NewArgs(II->args());
       NewArgs[NegatedOpArg] = OpNotNeg;
-      Instruction *NewMul =
-          Builder.CreateIntrinsic(II->getType(), IID, NewArgs, II);
+      Value *NewMul = Builder.CreateIntrinsic(II->getType(), IID, NewArgs, II);
       return replaceInstUsesWith(*II, Builder.CreateFNegFMF(NewMul, II));
     }
     break;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 7b6d380acffe1..aa0fc6c652126 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4945,7 +4945,7 @@ Value *InstCombinerImpl::foldMultiplicationOverflowCheck(ICmpInst &I) {
   if (MulHadOtherUses)
     Builder.SetInsertPoint(Mul);
 
-  CallInst *Call = Builder.CreateIntrinsic(
+  Value *Call = Builder.CreateIntrinsic(
       Div->getOpcode() == Instruction::UDiv ? Intrinsic::umul_with_overflow
                                             : Intrinsic::smul_with_overflow,
       X->getType(), {X, Y}, /*FMFSource=*/nullptr, "mul");
@@ -6739,7 +6739,7 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
     MulA = Builder.CreateZExt(A, MulType);
   if (WidthB < MulWidth)
     MulB = Builder.CreateZExt(B, MulType);
-  CallInst *Call =
+  Value *Call =
       Builder.CreateIntrinsic(Intrinsic::umul_with_overflow, MulType,
                               {MulA, MulB}, /*FMFSource=*/nullptr, "umul");
   IC.addToWorklist(MulInstr);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 17ac09c10f41c..a721d8eca7ac5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -636,7 +636,7 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
                            Value *Y, Value *Z) {
     InstCombiner::BuilderTy &Builder = IC.Builder;
     Value *YZ = Builder.CreateNSWAdd(Y, Z);
-    Instruction *NewPow = Builder.CreateIntrinsic(
+    Value *NewPow = Builder.CreateIntrinsic(
         Intrinsic::powi, {X->getType(), YZ->getType()}, {X, YZ}, &I);
 
     return NewPow;
@@ -654,7 +654,7 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
                          m_Deferred(X)))) {
     Constant *One = ConstantInt::get(Y->getType(), 1);
     if (willNotOverflowSignedAdd(Y, One, I)) {
-      Instruction *NewPow = createPowiExpr(I, *this, X, Y, One);
+      Value *NewPow = createPowiExpr(I, *this, X, Y, One);
       return replaceInstUsesWith(I, NewPow);
     }
   }
@@ -668,7 +668,7 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
       match(Op1, m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(m_Specific(X),
                                                              m_Value(Z)))) &&
       Y->getType() == Z->getType() && willNotOverflowSignedAdd(Y, Z, I)) {
-    Instruction *NewPow = createPowiExpr(I, *this, X, Y, Z);
+    Value *NewPow = createPowiExpr(I, *this, X, Y, Z);
     return replaceInstUsesWith(I, NewPow);
   }
 
@@ -681,7 +681,7 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
                        m_Specific(Op1), m_Value(Y))))) &&
         willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
       Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
-      Instruction *NewPow = createPowiExpr(I, *this, Op1, Y, NegOne);
+      Value *NewPow = createPowiExpr(I, *this, Op1, Y, NegOne);
       return replaceInstUsesWith(I, NewPow);
     }
 
@@ -1927,7 +1927,7 @@ Instruction *InstCombinerImpl::foldFDivConstantDivisor(BinaryOperator &I) {
       (match(I.getOperand(1), m_PosZeroFP()) ||
        (I.hasNoSignedZeros() && match(I.getOperand(1), m_AnyZeroFP())))) {
     IRBuilder<> B(&I);
-    CallInst *CopySign = B.CreateIntrinsic(
+    Value *CopySign = B.CreateIntrinsic(
         Intrinsic::copysign, {C->getType()},
         {ConstantFP::getInfinity(I.getType()), I.getOperand(0)}, &I);
     CopySign->takeName(&I);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index df9ba225657d3..8a9ae961895eb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -417,9 +417,9 @@ Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI,
           Value *SelectVal = Builder.CreateSelect(Cond, LdexpVal0, LdexpVal1);
           Value *SelectExp = Builder.CreateSelect(Cond, LdexpExp0, LdexpExp1);
 
-          CallInst *NewLdexp = Builder.CreateIntrinsic(
-              TII->getType(), Intrinsic::ldexp, {SelectVal, SelectExp});
-          NewLdexp->setFastMathFlags(FMF);
+          Value *NewLdexp = Builder.CreateIntrinsic(
+              TII->getType(), Intrinsic::ldexp, {SelectVal, SelectExp}, {}, "",
+              [&FMF](CallInst *CI) { CI->setFastMathFlags(FMF); });
           return replaceInstUsesWith(SI, NewLdexp);
         }
       }
diff --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
index fb7e58f4632ef..0506a9b44589b 100644
--- a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -111,7 +111,7 @@ static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal,
 static CallInst *InsertTrap(BuilderTy &IRB, bool DebugTrapBB,
                             std::optional<int8_t> GuardKind) {
   if (!DebugTrapBB)
-    return IRB.CreateIntrinsic(Intrinsic::trap, {});
+    return IRB.CreateIntrinsicWithoutFolding(Intrinsic::trap, {});
 
   uint64_t ImmArg = GuardKind.has_value()
                         ? GuardKind.value()
@@ -121,8 +121,8 @@ static CallInst *InsertTrap(BuilderTy &IRB, bool DebugTrapBB,
   if (ImmArg > 255)
     ImmArg = 255;
 
-  return IRB.CreateIntrinsic(Intrinsic::ubsantrap,
-                             ConstantInt::get(IRB.getInt8Ty(), ImmArg));
+  return IRB.CreateIntrinsicWithoutFolding(
+      Intrinsic::ubsantrap, ConstantInt::get(IRB.getInt8Ty(), ImmArg));
 }
 
 static CallInst *InsertCall(BuilderTy &IRB, bool MayReturn, StringRef Name) {
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index c45ec68f3cd07..97521891d07e9 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1268,7 +1268,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     MS.initializeCallbacks(*F.getParent(), TLI);
     FnPrologueEnd =
         IRBuilder<>(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHIIt())
-            .CreateIntrinsic(Intrinsic::donothing, {});
+            .CreateIntrinsicWithoutFolding(Intrinsic::donothing, {});
 
     if (MS.CompileKernel) {
       IRBuilder<> IRB(FnPrologueEnd);
@@ -4642,7 +4642,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     // copy them.
     ShadowArgs.append(1, IRB.CreateBitCast(SrcShadow, Src->getType()));
 
-    CallInst *CI =
+    Value *CI =
         IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), ShadowArgs);
     setShadow(&I, CI);
 
@@ -4697,7 +4697,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     ShadowArgs.append(1, SrcShadowPtr);
     ShadowArgs.append(1, Mask);
 
-    CallInst *CI =
+    Value *CI =
         IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), ShadowArgs);
     // The AVX masked load intrinsics do not have integer variants. We use the
     // floating-point variants, which will happily copy the shadows even if
@@ -4747,8 +4747,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     // Shadows are integer-ish types but some intrinsics require a
     // different (e.g., floating-point) type.
     Shadow = IRB.CreateBitCast(Shadow, I.getArgOperand(0)->getType());
-    CallInst *CI = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(),
-                                       {Shadow, I.getArgOperand(1)});
+    Value *CI = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(),
+                                    {Shadow, I.getArgOperand(1)});
 
     setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I)));
     setOriginForNaryOp(I);
@@ -4781,8 +4781,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     // different (e.g., floating-point) type.
     AShadow = IRB.CreateBitCast(AShadow, I.getArgOperand(0)->getType());
     BShadow = IRB.CreateBitCast(BShadow, I.getArgOperand(2)->getType());
-    CallInst *CI = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(),
-                                       {AShadow, Idx, BShadow});
+    Value *CI = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(),
+                                    {AShadow, Idx, BShadow});
     setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I)));
     setOriginForNaryOp(I);
   }
@@ -5394,12 +5394,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     Value *XShadow = getShadow(X);
     Value *BZeroShadow = getCleanShadow(B);
 
-    CallInst *AShadowXShadow = IRB.CreateIntrinsic(
+    Value *AShadowXShadow = IRB.CreateIntrinsic(
         I.getType(), I.getIntrinsicID(), {XShadow, AShadow, BZeroShadow});
-    CallInst *AShadowX = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(),
-                                             {X, AShadow, BZeroShadow});
-    CallInst *XShadowA = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(),
-                                             {XShadow, A, BZeroShadow});
+    Value *AShadowX = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(),
+                                          {X, AShadow, BZeroShadow});
+    Value *XShadowA = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(),
+                                          {XShadow, A, BZeroShadow});
 
     unsigned NumElements = cast<FixedVectorType>(I.getType())->getNumElements();
     Value *BShadow = getShadow(B);
@@ -5477,7 +5477,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     // The NEON vector load instructions handled by this function all have
     // integer variants. It is easier to use those rather than trying to cast
     // a struct of vectors of floats into a struct of vectors of integers.
-    CallInst *CI =
+    Value *CI =
         IRB.CreateIntrinsic(getShadowTy(&I), I.getIntrinsicID(), ShadowArgs);
     setShadow(&I, CI);
 
@@ -5560,7 +5560,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
         Addr, IRB, OutputShadowTy, Align(1), /*isStore*/ true);
     ShadowArgs.append(1, OutputShadowPtr);
 
-    CallInst *CI =
+    Value *CI =
         IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), ShadowArgs);
     setShadow(&I, CI);
 
@@ -5757,8 +5757,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       ShadowArgs.push_back(Arg);
     }
 
-    CallInst *CI =
-        IRB.CreateIntrinsic(I.getType(), shadowIntrinsicID, ShadowArgs);
+    Value *CI = IRB.CreateIntrinsic(I.getType(), shadowIntrinsicID, ShadowArgs);
     Value *CombinedShadow = CI;
 
     // Combine the computed shadow with the shadow of trailing args
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 7a08d99a9e505..33cc9ad8f5925 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1174,7 +1174,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
              isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) {
     assert(isa<SCEVConstant>(StoreSizeSCEV) && "Expected constant store size");
 
-    NewCall = Builder.CreateIntrinsic(
+    NewCall = Builder.CreateIntrinsicWithoutFolding(
         Intrinsic::experimental_memset_pattern,
         {DestInt8PtrTy, PatternValue->getType(), IntIdxTy},
         {BasePtr, PatternValue, MemsetArg,
@@ -2600,27 +2600,24 @@ bool LoopIdiomRecognize::recognizePopcount() {
   return true;
 }
 
-static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
-                                       const DebugLoc &DL) {
+static Value *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
+                                    const DebugLoc &DL) {
   Value *Ops[] = {Val};
   Type *Tys[] = {Val->getType()};
 
-  CallInst *CI = IRBuilder.CreateIntrinsic(Intrinsic::ctpop, Tys, Ops);
-  CI->setDebugLoc(DL);
-
-  return CI;
+  return IRBuilder.CreateIntrinsic(
+      Intrinsic::ctpop, Tys, Ops, {}, "", {},
+      [&DL](CallInst *CI) { CI->setDebugLoc(DL); });
 }
 
-static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
-                                    const DebugLoc &DL, bool ZeroCheck,
-                                    Intrinsic::ID IID) {
+static Value *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
+                                 const DebugLoc &DL, bool ZeroCheck,
+                                 Intrinsic::ID IID) {
   Value *Ops[] = {Val, IRBuilder.getInt1(ZeroCheck)};
   Type *Tys[] = {Val->getType()};
 
-  CallInst *CI = IRBuilder.CreateIntrinsic(IID, Tys, Ops);
-  CI->setDebugLoc(DL);
-
-  return CI;
+  return IRBuilder.CreateIntrinsic(
+      IID, Tys, Ops, {}, "", {}, [&DL](CallInst *CI) { CI->setDebugLoc(DL); });
 }
 
 /// Transform the following loop (Using CTLZ, CTTZ is similar):
@@ -3126,7 +3123,7 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
   Value *Mask =
       Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask");
   Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked");
-  CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(
+  Value *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(
       IntrID, Ty, {XMasked, /*is_zero_poison=*/Builder.getTrue()},
       /*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros");
   Value *XMaskedNumActiveBits = Builder.CreateSub(
@@ -3486,7 +3483,7 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
 
   // Step 1: Compute the loop's final IV value / trip count.
 
-  CallInst *ValNumLeadingZeros = Builder.CreateIntrinsic(
+  Value *ValNumLeadingZeros = Builder.CreateIntrinsic(
       IntrID, Ty, {Val, /*is_zero_poison=*/Builder.getFalse()},
       /*FMFSource=*/nullptr, Val->getName() + ".numleadingzeros");
   Value *ValNumActiveBits = Builder.CreateSub(
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index b2e45c3340b9f..d09d6bc57ee11 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -2278,9 +2278,9 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
     // Get the backedge taken count and truncate or extended to the AR type.
     Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
 
-    CallInst *Mul = Builder.CreateIntrinsic(Intrinsic::umul_with_overflow, Ty,
-                                            {AbsStep, TruncTripCount},
-                                            /*FMFSource=*/nullptr, "mul");
+    Value *Mul = Builder.CreateIntrinsic(Intrinsic::umul_with_overflow, Ty,
+                                         {AbsStep, TruncTripCount},
+                                         /*FMFSource=*/nullptr, "mul");
     Value *MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
     Value *OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
 
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 1cbc8d0bef5ae..623a413d246fc 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2251,7 +2251,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
       hasFloatFn(M, TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) {
 
     if (Pow->doesNotAccessMemory()) {
-      CallInst *NewExp10 =
+      Value *NewExp10 =
           B.CreateIntrinsic(Intrinsic::exp10, {Ty}, {Expo}, Pow, "exp10");
       return copyFlags(*Pow, NewExp10);
     }
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8a2e5a97f7573..47c4e8b20cf15 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -23687,7 +23687,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       setInsertPointAfterBundle(E);
 
       LoadInst *LI = cast<LoadInst>(VL0);
-      Instruction *NewLI;
+      Value *NewLI;
       FixedVectorType *StridedLoadTy = nullptr;
       Value *PO = LI->getPointerOperand();
       if (E->State == TreeEntry::Vectorize) {
@@ -23750,7 +23750,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
                                          static_cast<int>(
                                              DL->getTypeAllocSize(ScalarTy))));
         Align CommonAlignment = computeCommonAlignment<LoadInst>(E->Scalars);
-        auto *Inst = Builder.CreateIntrinsic(
+        auto *Inst = Builder.CreateIntrinsicWithoutFolding(
             Intrinsic::experimental_vp_strided_load,
             {StridedLoadTy, PO->getType(), StrideTy},
             {PO, StrideVal,
@@ -23809,7 +23809,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       VecValue = FinalShuffle(VecValue, E);
 
       Value *Ptr = SI->getPointerOperand();
-      Instruction *ST;
+      Value *ST;
       if (E->State == TreeEntry::Vectorize) {
         ST = Builder.CreateAlignedStore(VecValue, Ptr, SI->getAlign());
       } else {
@@ -23839,7 +23839,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
                 StrideTy, static_cast<int>(DL->getTypeAllocSize(ScalarTy))));
         if (StridedStoreTy != VecTy)
           VecValue = Builder.CreateBitOrPointerCast(VecValue, StridedStoreTy);
-        auto *Inst = Builder.CreateIntrinsic(
+        auto *Inst = Builder.CreateIntrinsicWithoutFolding(
             Intrinsic::experimental_vp_strided_store,
             {StridedStoreTy, Ptr->getType(), StrideTy},
             {VecValue, Ptr, StrideVal,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 9aeacd28e00cf..11b791fe9358d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2314,7 +2314,7 @@ void VPHistogramRecipe::execute(VPTransformState &State) {
   else
     assert(Opcode == Instruction::Add && "only add or sub supported for now");
 
-  auto *HistogramInst = State.Builder.CreateIntrinsic(
+  Instruction *HistogramInst = State.Builder.CreateIntrinsicWithoutFolding(
       Intrinsic::experimental_vector_histogram_add, {VTy, IncAmt->getType()},
       {Address, IncAmt, Mask});
   applyMetadata(*HistogramInst);
@@ -4067,18 +4067,17 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
     Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
 
   if (CreateGather) {
-    NewLI =
-        Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
-                                nullptr, "wide.masked.gather");
+    NewLI = Builder.CreateIntrinsicWithoutFolding(DataTy, Intrinsic::vp_gather,
+                                                  {Addr, Mask, EVL}, nullptr,
+                                                  "wide.masked.gather");
   } else {
-    NewLI = Builder.CreateIntrinsic(DataTy, Intrinsic::vp_load,
-                                    {Addr, Mask, EVL}, nullptr, "vp.op.load");
+    NewLI = Builder.CreateIntrinsicWithoutFolding(
+        DataTy, Intrinsic::vp_load, {Addr, Mask, EVL}, nullptr, "vp.op.load");
   }
   NewLI->addParamAttr(
       0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
   applyMetadata(*NewLI);
-  Instruction *Res = NewLI;
-  State.set(this, Res);
+  State.set(this, NewLI);
 }
 
 InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
@@ -4156,13 +4155,13 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
 
   Value *Addr = State.get(getAddr(), !CreateScatter);
   if (CreateScatter) {
-    NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
-                                    Intrinsic::vp_scatter,
-                                    {StoredVal, Addr, Mask, EVL});
+    NewSI = Builder.CreateIntrinsicWithoutFolding(
+        Type::getVoidTy(EVL->getContext()), Intrinsic::vp_scatter,
+        {StoredVal, Addr, Mask, EVL});
   } else {
-    NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
-                                    Intrinsic::vp_store,
-                                    {StoredVal, Addr, Mask, EVL});
+    NewSI = Builder.CreateIntrinsicWithoutFolding(
+        Type::getVoidTy(EVL->getContext()), Intrinsic::vp_store,
+        {StoredVal, Addr, Mask, EVL});
   }
   NewSI->addParamAttr(
       1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));
@@ -4351,7 +4350,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
       // so must use intrinsics to deinterleave.
       assert(InterleaveFactor <= 8 &&
              "Unsupported deinterleave factor for scalable vectors");
-      NewLoad = State.Builder.CreateIntrinsic(
+      NewLoad = State.Builder.CreateIntrinsicWithoutFolding(
           Intrinsic::getDeinterleaveIntrinsicID(InterleaveFactor),
           NewLoad->getType(), NewLoad,
           /*FMFSource=*/nullptr, "strided.vec");
@@ -4515,7 +4514,7 @@ void VPInterleaveEVLRecipe::execute(VPTransformState &State) {
 
   // Vectorize the interleaved load group.
   if (isa<LoadInst>(Instr)) {
-    CallInst *NewLoad = State.Builder.CreateIntrinsic(
+    CallInst *NewLoad = State.Builder.CreateIntrinsicWithoutFolding(
         VecTy, Intrinsic::vp_load, {ResAddr, GroupMask, InterleaveEVL}, nullptr,
         "wide.vp.load");
     NewLoad->addParamAttr(0,
@@ -4527,7 +4526,7 @@ void VPInterleaveEVLRecipe::execute(VPTransformState &State) {
 
     // Scalable vectors cannot use arbitrary shufflevectors (only splats),
     // so must use intrinsics to deinterleave.
-    NewLoad = State.Builder.CreateIntrinsic(
+    NewLoad = State.Builder.CreateIntrinsicWithoutFolding(
         Intrinsic::getDeinterleaveIntrinsicID(InterleaveFactor),
         NewLoad->getType(), NewLoad,
         /*FMFSource=*/nullptr, "strided.vec");
@@ -4579,9 +4578,10 @@ void VPInterleaveEVLRecipe::execute(VPTransformState &State) {
 
   // Interleave all the smaller vectors into one wider vector.
   Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
-  CallInst *NewStore =
-      State.Builder.CreateIntrinsic(Type::getVoidTy(Ctx), Intrinsic::vp_store,
-                                    {IVec, ResAddr, GroupMask, InterleaveEVL});
+  CallInst *NewStore = State.Builder.CreateIntrinsicWithoutFolding(
+      Type::getVoidTy(Ctx), Intrinsic::vp_store,
+      {IVec, ResAddr, GroupMask, InterleaveEVL});
+
   NewStore->addParamAttr(1,
                          Attribute::getWithAlignment(Ctx, Group->getAlign()));
 
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 99e45bdc8ee21..7a9526b9c2fe3 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1813,7 +1813,7 @@ bool VectorCombine::foldBinopOfReductions(Instruction &I) {
   else
     VectorBO = Builder.CreateBinOp(BinOpOpc, V0, V1);
 
-  Instruction *Rdx = Builder.CreateIntrinsic(ReductionIID, {VTy}, {VectorBO});
+  Value *Rdx = Builder.CreateIntrinsic(ReductionIID, {VTy}, {VectorBO});
   replaceValue(I, *Rdx);
   return true;
 }
@@ -4245,14 +4245,14 @@ bool VectorCombine::foldShuffleChainsToReduce(Instruction &I) {
   if (IsPartialReduction)
     ReduceInput = Builder.CreateShuffleVector(FinalVecV, ExtractMask);
 
-  CallInst *ReducedResult;
+  Value *ReducedResult;
   if (IsFloatReduction) {
     Value *Identity = ConstantExpr::getBinOpIdentity(
         *CommonBinOp, ReduceVecTy->getElementType(), /*AllowRHSConstant=*/false,
         CommonFMF.noSignedZeros());
-    ReducedResult = Builder.CreateIntrinsic(ReducedOp, {ReduceVecTy},
-                                            {Identity, ReduceInput});
-    ReducedResult->setFastMathFlags(CommonFMF);
+    ReducedResult = Builder.CreateIntrinsic(
+        ReducedOp, {ReduceVecTy}, {Identity, ReduceInput}, {}, "", {},
+        [&CommonFMF](CallInst *CI) { CI->setFastMathFlags(CommonFMF); });
   } else {
     ReducedResult =
         Builder.CreateIntrinsic(ReducedOp, {ReduceVecTy}, {ReduceInput});
@@ -5936,12 +5936,14 @@ bool VectorCombine::foldBitcastOfVPLoad(Instruction &I) {
   unsigned Factor = NewVecCnt.getKnownScalarFactor(OrigVecCnt);
   Value *NewEVL = Builder.CreateNUWMul(EVL, Builder.getInt32(Factor));
   Value *NewMask = Builder.CreateVectorSplat(NewVecCnt, Builder.getTrue());
-  CallInst *NewVP =
-      Builder.CreateIntrinsic(NewVecTy, Intrinsic::vp_load,
-                              {II->getMemoryPointerParam(), NewMask, NewEVL});
-  // Preserve the original alignment.
-  NewVP->addParamAttrs(
-      0, AttrBuilder(II->getContext()).addAlignmentAttr(OrigAlign));
+  Value *NewVP = Builder.CreateIntrinsic(
+      NewVecTy, Intrinsic::vp_load,
+      {II->getMemoryPointerParam(), NewMask, NewEVL}, {}, "",
+      [&](CallInst *CI) {
+        // Preserve the original alignment.
+        CI->addParamAttrs(
+            0, AttrBuilder(II->getContext()).addAlignmentAttr(OrigAlign));
+      });
   replaceValue(*Cast, *NewVP);
   return true;
 }
diff --git a/llvm/unittests/IR/IRBuilderTest.cpp b/llvm/unittests/IR/IRBuilderTest.cpp
index b4f1c97f03aca..e693688aafb13 100644
--- a/llvm/unittests/IR/IRBuilderTest.cpp
+++ b/llvm/unittests/IR/IRBuilderTest.cpp
@@ -169,17 +169,19 @@ TEST_F(IRBuilderTest, IntrinsicMangling) {
   CallInst *Call;
 
   // Mangled return type, no arguments.
-  Call = Builder.CreateIntrinsic(Int64Ty, Intrinsic::coro_size, {});
+  Call =
+      Builder.CreateIntrinsicWithoutFolding(Int64Ty, Intrinsic::coro_size, {});
   EXPECT_EQ(Call->getCalledFunction()->getName(), "llvm.coro.size.i64");
 
   // Void return type, mangled argument type.
-  Call =
-      Builder.CreateIntrinsic(VoidTy, Intrinsic::set_loop_iterations, Int64Val);
+  Call = Builder.CreateIntrinsicWithoutFolding(
+      VoidTy, Intrinsic::set_loop_iterations, Int64Val);
   EXPECT_EQ(Call->getCalledFunction()->getName(),
             "llvm.set.loop.iterations.i64");
 
   // Mangled return type and argument type.
-  Call = Builder.CreateIntrinsic(Int64Ty, Intrinsic::lround, DoubleVal);
+  Call = Builder.CreateIntrinsicWithoutFolding(Int64Ty, Intrinsic::lround,
+                                               DoubleVal);
   EXPECT_EQ(Call->getCalledFunction()->getName(), "llvm.lround.i64.f64");
 }
 
@@ -199,8 +201,9 @@ TEST_F(IRBuilderTest, IntrinsicsWithScalableVectors) {
   Args.push_back(UndefValue::get(PredTy));
   Args.push_back(UndefValue::get(SrcVecTy));
 
-  Call = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_fcvtzs_i32f16, Args,
-                                 nullptr, "aarch64.sve.fcvtzs.i32f16");
+  Call = Builder.CreateIntrinsicWithoutFolding(
+      Intrinsic::aarch64_sve_fcvtzs_i32f16, Args, nullptr,
+      "aarch64.sve.fcvtzs.i32f16");
   FTy = Call->getFunctionType();
   EXPECT_EQ(FTy->getReturnType(), DstVecTy);
   for (unsigned i = 0; i != Args.size(); ++i)
@@ -218,8 +221,9 @@ TEST_F(IRBuilderTest, IntrinsicsWithScalableVectors) {
   Args.push_back(UndefValue::get(PredTy));
   Args.push_back(UndefValue::get(VecTy));
 
-  Call = Builder.CreateIntrinsic(Intrinsic::masked_load, {VecTy, PtrToVecTy},
-                                 Args, nullptr, "masked.load");
+  Call = Builder.CreateIntrinsicWithoutFolding(Intrinsic::masked_load,
+                                               {VecTy, PtrToVecTy}, Args,
+                                               nullptr, "masked.load");
   FTy = Call->getFunctionType();
   EXPECT_EQ(FTy->getReturnType(), VecTy);
   for (unsigned i = 0; i != Args.size(); ++i)
@@ -360,12 +364,12 @@ TEST_F(IRBuilderTest, ConstrainedFP) {
   ASSERT_TRUE(isa<IntrinsicInst>(V));
   II = cast<IntrinsicInst>(V);
   EXPECT_EQ(II->getIntrinsicID(), Intrinsic::experimental_constrained_fmul);
-  
+
   V = Builder.CreateFDiv(V, V);
   ASSERT_TRUE(isa<IntrinsicInst>(V));
   II = cast<IntrinsicInst>(V);
   EXPECT_EQ(II->getIntrinsicID(), Intrinsic::experimental_constrained_fdiv);
-  
+
   V = Builder.CreateFRem(V, V);
   ASSERT_TRUE(isa<IntrinsicInst>(V));
   II = cast<IntrinsicInst>(V);
@@ -459,18 +463,18 @@ TEST_F(IRBuilderTest, ConstrainedFP) {
   EXPECT_EQ(RoundingMode::Dynamic, CII->getRoundingMode());
 
   // Now override the defaults.
-  Call = Builder.CreateConstrainedFPBinOp(
-        Intrinsic::experimental_constrained_fadd, V, V, nullptr, "", nullptr,
-        RoundingMode::TowardNegative, fp::ebMayTrap);
+  Call = cast<CallInst>(Builder.CreateConstrainedFPBinOp(
+      Intrinsic::experimental_constrained_fadd, V, V, nullptr, "", nullptr,
+      RoundingMode::TowardNegative, fp::ebMayTrap));
   CII = cast<ConstrainedFPIntrinsic>(Call);
   EXPECT_EQ(CII->getIntrinsicID(), Intrinsic::experimental_constrained_fadd);
   EXPECT_EQ(fp::ebMayTrap, CII->getExceptionBehavior());
   EXPECT_EQ(RoundingMode::TowardNegative, CII->getRoundingMode());
 
   // Same as previous test for CreateConstrainedFPIntrinsic
-  Call = Builder.CreateConstrainedFPIntrinsic(
+  Call = cast<CallInst>(Builder.CreateConstrainedFPIntrinsic(
       Intrinsic::experimental_constrained_fadd, {V->getType()}, {V, V}, nullptr,
-      "", nullptr, RoundingMode::TowardNegative, fp::ebMayTrap);
+      "", nullptr, RoundingMode::TowardNegative, fp::ebMayTrap));
   CII = cast<ConstrainedFPIntrinsic>(Call);
   EXPECT_EQ(CII->getIntrinsicID(), Intrinsic::experimental_constrained_fadd);
   EXPECT_EQ(fp::ebMayTrap, CII->getExceptionBehavior());
@@ -696,7 +700,7 @@ TEST_F(IRBuilderTest, FastMathFlags) {
   ASSERT_TRUE(isa<Instruction>(F));
   FDiv = cast<Instruction>(F);
   EXPECT_FALSE(FDiv->hasAllowReciprocal());
- 
+
   // Try individual flags.
   FMF.clear();
   FMF.setAllowReciprocal();
@@ -755,7 +759,7 @@ TEST_F(IRBuilderTest, FastMathFlags) {
   EXPECT_TRUE(FAdd->hasApproxFunc());
   EXPECT_TRUE(FAdd->hasAllowContract());
   EXPECT_FALSE(FAdd->hasAllowReassoc());
-  
+
   FMF.setAllowReassoc();
   Builder.clearFastMathFlags();
   Builder.setFastMathFlags(FMF);
diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp
index 87d922d22eaac..d502bf591b7fa 100644
--- a/llvm/unittests/IR/IntrinsicsTest.cpp
+++ b/llvm/unittests/IR/IntrinsicsTest.cpp
@@ -327,7 +327,8 @@ TEST_F(IntrinsicsTest, IRBuilderCreateIntrinsicScalar) {
   Args.push_back(ConstantInt::get(Type::getInt32Ty(Context), 10));
   Args.push_back(ConstantInt::get(Type::getInt32Ty(Context), 20));
 
-  CallInst *CI = Builder.CreateIntrinsic(RetTy, Intrinsic::umax, Args);
+  CallInst *CI =
+      Builder.CreateIntrinsicWithoutFolding(RetTy, Intrinsic::umax, Args);
 
   ASSERT_NE(CI, nullptr);
   EXPECT_EQ(CI->getIntrinsicID(), Intrinsic::umax);
@@ -345,7 +346,8 @@ TEST_F(IntrinsicsTest, IRBuilderCreateIntrinsicVector) {
   Args.push_back(Constant::getNullValue(RetTy));
   Args.push_back(Constant::getNullValue(RetTy));
 
-  CallInst *CI = Builder.CreateIntrinsic(RetTy, Intrinsic::umax, Args);
+  CallInst *CI =
+      Builder.CreateIntrinsicWithoutFolding(RetTy, Intrinsic::umax, Args);
 
   ASSERT_NE(CI, nullptr);
   EXPECT_EQ(CI->getIntrinsicID(), Intrinsic::umax);
@@ -366,7 +368,8 @@ TEST_F(IntrinsicsTest, IRBuilderCreateIntrinsicAddressSpace) {
   Args.push_back(ConstantInt::get(Type::getInt32Ty(Context), 3)); // locality
   Args.push_back(ConstantInt::get(Type::getInt32Ty(Context), 1)); // cache type
 
-  CallInst *CI = Builder.CreateIntrinsic(RetTy, Intrinsic::prefetch, Args);
+  CallInst *CI =
+      Builder.CreateIntrinsicWithoutFolding(RetTy, Intrinsic::prefetch, Args);
 
   ASSERT_NE(CI, nullptr);
   EXPECT_EQ(CI->getIntrinsicID(), Intrinsic::prefetch);
@@ -394,7 +397,7 @@ TEST_F(IntrinsicsTest, IRBuilderCreateIntrinsicVarArg) {
   Args.push_back(ConstantInt::get(Type::getInt32Ty(Context), 0)); // NumCallArgs
   Args.push_back(ConstantInt::get(Type::getInt32Ty(Context), 0)); // Flags
 
-  CallInst *CI = Builder.CreateIntrinsic(
+  CallInst *CI = Builder.CreateIntrinsicWithoutFolding(
       RetTy, Intrinsic::experimental_gc_statepoint, Args);
 
   ASSERT_NE(CI, nullptr);
diff --git a/llvm/unittests/Transforms/Utils/LocalTest.cpp b/llvm/unittests/Transforms/Utils/LocalTest.cpp
index 8ceeacf132da1..616617853ac68 100644
--- a/llvm/unittests/Transforms/Utils/LocalTest.cpp
+++ b/llvm/unittests/Transforms/Utils/LocalTest.cpp
@@ -1114,7 +1114,7 @@ TEST(Local, CanReplaceOperandWithVariable) {
   // immarg.
   Type *PtrPtr = B.getPtrTy(0);
   Value *Alloca = B.CreateAlloca(PtrPtr, (unsigned)0);
-  CallInst *GCRoot = B.CreateIntrinsic(
+  auto *GCRoot = B.CreateIntrinsicWithoutFolding(
       Intrinsic::gcroot, {Alloca, Constant::getNullValue(PtrPtr)});
   EXPECT_TRUE(canReplaceOperandWithVariable(GCRoot, 0)); // Alloca
   EXPECT_FALSE(canReplaceOperandWithVariable(GCRoot, 1));
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index 01d3e9c4a62ce..442e6e16e955e 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -926,13 +926,13 @@ void mlir::LLVM::detail::connectPHINodes(Region &region,
 llvm::CallInst *mlir::LLVM::detail::createIntrinsicCall(
     llvm::IRBuilderBase &builder, llvm::Intrinsic::ID intrinsic,
     ArrayRef<llvm::Value *> args, ArrayRef<llvm::Type *> tys) {
-  return builder.CreateIntrinsic(intrinsic, tys, args);
+  return builder.CreateIntrinsicWithoutFolding(intrinsic, tys, args);
 }
 
 llvm::CallInst *mlir::LLVM::detail::createIntrinsicCall(
     llvm::IRBuilderBase &builder, llvm::Intrinsic::ID intrinsic,
     llvm::Type *retTy, ArrayRef<llvm::Value *> args) {
-  return builder.CreateIntrinsic(retTy, intrinsic, args);
+  return builder.CreateIntrinsicWithoutFolding(retTy, intrinsic, args);
 }
 
 llvm::CallInst *mlir::LLVM::detail::createIntrinsicCall(

>From 28771c07b690d77c277120a172ac7e98b9ab5e38 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Thu, 11 Jun 2026 12:34:44 +0100
Subject: [PATCH 2/3] [IRBuilder] Address review, improve some auto types

---
 .../AMDGPU/AMDGPULowerBufferFatPointers.cpp   |  6 +--
 .../Target/AMDGPU/AMDGPULowerIntrinsics.cpp   |  2 +-
 .../Target/DirectX/DXILIntrinsicExpansion.cpp |  8 +--
 llvm/lib/Target/DirectX/DXILOpLowering.cpp    |  2 +-
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 49 ++++++++++---------
 llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp |  2 +-
 .../Target/SPIRV/SPIRVPrepareFunctions.cpp    |  9 ++--
 llvm/lib/Transforms/IPO/FatLTOCleanup.cpp     |  8 +--
 .../Transforms/Vectorize/SLPVectorizer.cpp    |  4 +-
 .../Transforms/Vectorize/VectorCombine.cpp    | 12 ++---
 llvm/unittests/Transforms/Utils/LocalTest.cpp |  2 +-
 11 files changed, 51 insertions(+), 53 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index 14c069f025683..f74f6f1baa576 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -1802,7 +1802,7 @@ Value *SplitPtrStructs::handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr,
     }
   }
 
-  auto *Call = IRB.CreateIntrinsicWithoutFolding(IID, Ty, Args);
+  CallInst *Call = IRB.CreateIntrinsicWithoutFolding(IID, Ty, Args);
   copyMetadata(Call, I);
   setAlign(Call, Alignment, Arg ? 1 : 0);
   Call->takeName(I);
@@ -1869,7 +1869,7 @@ PtrParts SplitPtrStructs::visitAtomicCmpXchgInst(AtomicCmpXchgInst &AI) {
     Aux |= AMDGPU::CPol::SLC;
   if (AI.isVolatile())
     Aux |= AMDGPU::CPol::VOLATILE;
-  auto *Call = IRB.CreateIntrinsicWithoutFolding(
+  CallInst *Call = IRB.CreateIntrinsicWithoutFolding(
       Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap, Ty,
       {AI.getNewValOperand(), AI.getCompareOperand(), Rsrc, Off,
        IRB.getInt32(0), IRB.getInt32(Aux)});
@@ -2324,7 +2324,7 @@ PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &I) {
         IID == Intrinsic::amdgcn_load_to_lds
             ? Intrinsic::amdgcn_raw_ptr_buffer_load_lds
             : Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds;
-    Value *NewLoad = IRB.CreateIntrinsic(
+    Instruction *NewLoad = IRB.CreateIntrinsicWithoutFolding(
         NewIntr, {}, {Rsrc, LDSPtr, LoadSize, Off, SOffset, ImmOff, Aux});
     copyMetadata(NewLoad, &I);
     SplitUsers.insert(&I);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
index 2b80cd6096597..f3fbee86168a0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
@@ -111,7 +111,7 @@ bool AMDGPULowerIntrinsicsImpl::visitBarrier(IntrinsicInst &I) {
     } else {
       Value *BarrierID_32 = B.getInt32(AMDGPU::Barrier::WORKGROUP);
       Value *BarrierID_16 = B.getInt16(AMDGPU::Barrier::WORKGROUP);
-      auto *IsFirst = B.CreateIntrinsicWithoutFolding(
+      CallInst *IsFirst = B.CreateIntrinsicWithoutFolding(
           B.getInt1Ty(), Intrinsic::amdgcn_s_barrier_signal_isfirst,
           {BarrierID_32});
       IsFirst->copyMetadata(I);
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 06906d3b6d4e3..88eda6656d89b 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -443,7 +443,7 @@ static Value *expandExpIntrinsic(CallInst *Orig) {
                              ConstantFP::get(EltTy, numbers::log2ef))
                        : ConstantFP::get(EltTy, numbers::log2ef);
   Value *NewX = Builder.CreateFMul(Log2eConst, X);
-  auto *Exp2Call = Builder.CreateIntrinsicWithoutFolding(
+  CallInst *Exp2Call = Builder.CreateIntrinsicWithoutFolding(
       Ty, Intrinsic::exp2, {NewX}, nullptr, "dx.exp2");
   Exp2Call->setTailCall(Orig->isTailCall());
   Exp2Call->setAttributes(Orig->getAttributes());
@@ -569,7 +569,7 @@ static Value *expandLogIntrinsic(CallInst *Orig,
                                  cast<FixedVectorType>(Ty)->getNumElements()),
                              ConstantFP::get(EltTy, LogConstVal))
                        : ConstantFP::get(EltTy, LogConstVal);
-  auto *Log2Call = Builder.CreateIntrinsicWithoutFolding(
+  CallInst *Log2Call = Builder.CreateIntrinsicWithoutFolding(
       Ty, Intrinsic::log2, {X}, nullptr, "elt.log2");
   Log2Call->setTailCall(Orig->isTailCall());
   Log2Call->setAttributes(Orig->getAttributes());
@@ -731,10 +731,10 @@ static Value *expandPowIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId) {
   if (IntrinsicId == Intrinsic::powi)
     Y = Builder.CreateSIToFP(Y, Ty);
 
-  auto *Log2Call =
+  Value *Log2Call =
       Builder.CreateIntrinsic(Ty, Intrinsic::log2, {X}, nullptr, "elt.log2");
   auto *Mul = Builder.CreateFMul(Log2Call, Y);
-  auto *Exp2Call = Builder.CreateIntrinsicWithoutFolding(
+  CallInst *Exp2Call = Builder.CreateIntrinsicWithoutFolding(
       Ty, Intrinsic::exp2, {Mul}, nullptr, "elt.exp2");
   Exp2Call->setTailCall(Orig->isTailCall());
   Exp2Call->setAttributes(Orig->getAttributes());
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 78bd196b394c8..ede9f36e9bdea 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -177,7 +177,7 @@ class OpLowerer {
   /// piecemeal way - we can add the casts in to avoid updating all of the uses
   /// or defs, and by the end all of the casts will be redundant.
   Value *createTmpHandleCast(Value *V, Type *Ty) {
-    auto *Cast = OpBuilder.getIRB().CreateIntrinsicWithoutFolding(
+    CallInst *Cast = OpBuilder.getIRB().CreateIntrinsicWithoutFolding(
         Intrinsic::dx_resource_casthandle, {Ty, V->getType()}, {V});
     CleanupCasts.push_back(Cast);
     return Cast;
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 7b5eb63351a8d..e49d0240b7487 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -892,7 +892,7 @@ SPIRVEmitIntrinsics::buildLogicalAccessChainFromGEP(GetElementPtrInst &GEP) {
   Args.push_back(B.getInt1(GEP.isInBounds()));
   Args.push_back(GEP.getOperand(0));
   llvm::append_range(Args, Indices);
-  auto *NewI =
+  Instruction *NewI =
       B.CreateIntrinsicWithoutFolding(Intrinsic::spv_gep, {Types}, {Args});
   replaceAllUsesWithAndErase(B, &GEP, NewI);
   return NewI;
@@ -1617,7 +1617,7 @@ void SPIRVEmitIntrinsics::preprocessUndefs(IRBuilder<> &B) {
         setInsertPointSkippingPhis(B, I);
         BPrepared = true;
       }
-      auto *IntrUndef =
+      CallInst *IntrUndef =
           B.CreateIntrinsicWithoutFolding(Intrinsic::spv_undef, {});
       I->replaceUsesOfWith(Op, IntrUndef);
       AggrConsts[IntrUndef] = AggrUndef;
@@ -1648,8 +1648,8 @@ void SPIRVEmitIntrinsics::preprocessPoisons(IRBuilder<> &B) {
           setInsertPointSkippingPhis(B, &I);
           BPrepared = true;
         }
-        auto *Call = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_poison,
-                                                     {B.getInt32Ty()}, {});
+        CallInst *Call = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_poison,
+                                                         {B.getInt32Ty()}, {});
         AggrConsts[Call] = Poison;
         AggrConstTypes[Call] = OpTy;
         Replacement = Call;
@@ -1737,7 +1737,7 @@ void SPIRVEmitIntrinsics::preprocessCompositeConstants(IRBuilder<> &B) {
               PrepareInsert();
               Type *PoisonTy = Op->getType();
               if (PoisonTy->isAggregateType()) {
-                auto *Call = B.CreateIntrinsicWithoutFolding(
+                CallInst *Call = B.CreateIntrinsicWithoutFolding(
                     Intrinsic::spv_poison, {B.getInt32Ty()}, {});
                 AggrConsts[Call] = cast<PoisonValue>(Op);
                 AggrConstTypes[Call] = PoisonTy;
@@ -1909,7 +1909,8 @@ Instruction *SPIRVEmitIntrinsics::visitIntrinsicInst(IntrinsicInst &I) {
   for (unsigned J = 0; J < SGEP->getNumIndices(); ++J)
     Args.push_back(SGEP->getIndexOperand(J));
 
-  auto *NewI = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_gep, Types, Args);
+  Instruction *NewI =
+      B.CreateIntrinsicWithoutFolding(Intrinsic::spv_gep, Types, Args);
   replaceAllUsesWithAndErase(B, &I, NewI);
   return NewI;
 }
@@ -2005,8 +2006,8 @@ Instruction *SPIRVEmitIntrinsics::visitGetElementPtrInst(GetElementPtrInst &I) {
         Args.push_back(I.getPointerOperand());
         Args.append(NewIndices.begin(), NewIndices.end());
 
-        auto *NewI = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_gep,
-                                                     {Types}, {Args});
+        Instruction *NewI = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_gep,
+                                                            {Types}, {Args});
         replaceAllUsesWithAndErase(B, &I, NewI);
         return NewI;
       }
@@ -2017,7 +2018,7 @@ Instruction *SPIRVEmitIntrinsics::visitGetElementPtrInst(GetElementPtrInst &I) {
   SmallVector<Value *, 4> Args;
   Args.push_back(B.getInt1(I.isInBounds()));
   llvm::append_range(Args, I.operands());
-  auto *NewI =
+  Instruction *NewI =
       B.CreateIntrinsicWithoutFolding(Intrinsic::spv_gep, {Types}, {Args});
   replaceAllUsesWithAndErase(B, &I, NewI);
   return NewI;
@@ -2040,7 +2041,7 @@ Instruction *SPIRVEmitIntrinsics::visitBitCastInst(BitCastInst &I) {
 
   SmallVector<Type *, 2> Types = {I.getType(), Source->getType()};
   SmallVector<Value *> Args(I.op_begin(), I.op_end());
-  auto *NewI =
+  Instruction *NewI =
       B.CreateIntrinsicWithoutFolding(Intrinsic::spv_bitcast, {Types}, {Args});
   replaceAllUsesWithAndErase(B, &I, NewI);
   return NewI;
@@ -2320,8 +2321,8 @@ Instruction *SPIRVEmitIntrinsics::visitInsertElementInst(InsertElementInst &I) {
   IRBuilder<> B(I.getParent());
   B.SetInsertPoint(&I);
   SmallVector<Value *> Args(I.op_begin(), I.op_end());
-  auto *NewI = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_insertelt,
-                                               {Types}, {Args});
+  Instruction *NewI = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_insertelt,
+                                                      {Types}, {Args});
   replaceAllUsesWithAndErase(B, &I, NewI);
   return NewI;
 }
@@ -2338,8 +2339,8 @@ SPIRVEmitIntrinsics::visitExtractElementInst(ExtractElementInst &I) {
   SmallVector<Type *, 3> Types = {I.getType(), I.getVectorOperandType(),
                                   I.getIndexOperand()->getType()};
   SmallVector<Value *, 2> Args = {I.getVectorOperand(), I.getIndexOperand()};
-  auto *NewI = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_extractelt,
-                                               {Types}, {Args});
+  Instruction *NewI = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_extractelt,
+                                                      {Types}, {Args});
   replaceAllUsesWithAndErase(B, &I, NewI);
   return NewI;
 }
@@ -2377,8 +2378,8 @@ Instruction *SPIRVEmitIntrinsics::visitExtractValueInst(ExtractValueInst &I) {
   SmallVector<Value *> Args(I.operands());
   for (auto &Op : I.indices())
     Args.push_back(B.getInt32(Op));
-  auto *NewI = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_extractv,
-                                               {I.getType()}, {Args});
+  Instruction *NewI = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_extractv,
+                                                      {I.getType()}, {Args});
   replaceAllUsesWithAndErase(B, &I, NewI);
   return NewI;
 }
@@ -2442,7 +2443,7 @@ Instruction *SPIRVEmitIntrinsics::visitStoreInst(StoreInst &I) {
     IntrinsicId = Intrinsic::spv_atomic_store;
     Args.push_back(B.getInt8(static_cast<uint8_t>(I.getOrdering())));
   }
-  auto *NewI = B.CreateIntrinsicWithoutFolding(
+  Instruction *NewI = B.CreateIntrinsicWithoutFolding(
       IntrinsicId, {I.getValueOperand()->getType(), PtrOp->getType()}, Args);
   NewI->copyMetadata(I);
   I.eraseFromParent();
@@ -2465,7 +2466,7 @@ Instruction *SPIRVEmitIntrinsics::visitAllocaInst(AllocaInst &I) {
   B.SetInsertPoint(&I);
   TrackConstants = false;
   Type *PtrTy = I.getType();
-  auto *NewI =
+  Instruction *NewI =
       ArraySize
           ? B.CreateIntrinsicWithoutFolding(
                 Intrinsic::spv_alloca_array, {PtrTy, ArraySize->getType()},
@@ -2493,7 +2494,7 @@ Instruction *SPIRVEmitIntrinsics::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
       static_cast<uint32_t>(getMemSemantics(I.getSuccessOrdering())) | ScSem));
   Args.push_back(B.getInt32(
       static_cast<uint32_t>(getMemSemantics(I.getFailureOrdering())) | ScSem));
-  auto *NewI = B.CreateIntrinsicWithoutFolding(
+  Instruction *NewI = B.CreateIntrinsicWithoutFolding(
       Intrinsic::spv_cmpxchg, {I.getPointerOperand()->getType()}, {Args});
   replaceMemInstrUses(&I, NewI, B);
   return NewI;
@@ -2574,7 +2575,7 @@ shouldEmitIntrinsicsForGlobalValue(const GlobalVariableUsers &GVUsers,
 Value *SPIRVEmitIntrinsics::buildSpvUndefComposite(Type *AggrTy,
                                                    IRBuilder<> &B) {
   auto MakeLeaf = [&](Type *ElemTy) -> Instruction * {
-    auto *Leaf = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_undef, {});
+    CallInst *Leaf = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_undef, {});
     AggrConsts[Leaf] = PoisonValue::get(ElemTy);
     AggrConstTypes[Leaf] = ElemTy;
     return Leaf;
@@ -2593,7 +2594,7 @@ Value *SPIRVEmitIntrinsics::buildSpvUndefComposite(Type *AggrTy,
       Elems.push_back(Entry);
     }
   }
-  auto *Composite = B.CreateIntrinsicWithoutFolding(
+  CallInst *Composite = B.CreateIntrinsicWithoutFolding(
       Intrinsic::spv_const_composite, {B.getInt32Ty()}, Elems);
   AggrConsts[Composite] = PoisonValue::get(AggrTy);
   AggrConstTypes[Composite] = AggrTy;
@@ -2620,8 +2621,8 @@ void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV,
           isa<PoisonValue>(Init) &&
           STI->canUseExtension(SPIRV::Extension::SPV_KHR_poison_freeze);
       if (UsePoison) {
-        auto *Call = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_poison,
-                                                     {B.getInt32Ty()}, {});
+        CallInst *Call = B.CreateIntrinsicWithoutFolding(Intrinsic::spv_poison,
+                                                         {B.getInt32Ty()}, {});
         AggrConsts[Call] = cast<PoisonValue>(Init);
         AggrConstTypes[Call] = Init->getType();
         InitOp = Call;
@@ -2631,7 +2632,7 @@ void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV,
     }
     Type *Ty = isAggrConstForceInt32(Init) ? B.getInt32Ty() : Init->getType();
     Constant *Const = isAggrConstForceInt32(Init) ? B.getInt32(1) : Init;
-    auto *InitInst = B.CreateIntrinsicWithoutFolding(
+    CallInst *InitInst = B.CreateIntrinsicWithoutFolding(
         Intrinsic::spv_init_global, {GV.getType(), Ty}, {&GV, Const});
     InitInst->setArgOperand(1, InitOp);
   }
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 8bff367fc3c38..75fc4bad268e7 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -2229,7 +2229,7 @@ void SPIRVGlobalRegistry::buildAssignType(IRBuilder<> &B, Type *Ty,
         MDString::get(Ctx, Arg->getName())};
     B.CreateIntrinsic(Intrinsic::spv_value_md,
                       {MetadataAsValue::get(Ctx, MDTuple::get(Ctx, ArgMDs))});
-    AssignCI = cast<CallInst>(B.CreateIntrinsic(Intrinsic::fake_use, {Arg}));
+    AssignCI = B.CreateIntrinsicWithoutFolding(Intrinsic::fake_use, {Arg});
   } else {
     AssignCI = buildIntrWithMD(Intrinsic::spv_assign_type, {Arg->getType()},
                                OfType, Arg, {}, B);
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
index ee4da13ebe5b4..a26eb4b7fdae3 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
@@ -161,11 +161,10 @@ static bool lowerIntrinsicToFunction(IntrinsicInst *Intrinsic,
     BasicBlock *EntryBB = BasicBlock::Create(M->getContext(), "entry", F);
     IRBuilder<> IRB(EntryBB);
     IntrinsicLowering IL(M->getDataLayout());
-    IRB.CreateIntrinsic(Intrinsic::bswap, Intrinsic->getType(), F->getArg(0),
-                        {}, "", {}, [&](CallInst *BSwap) {
-                          IRB.CreateRet(BSwap);
-                          IL.LowerIntrinsicCall(BSwap);
-                        });
+    Value *BSwap = IRB.CreateIntrinsic(
+        Intrinsic::bswap, Intrinsic->getType(), F->getArg(0), {}, "", {},
+        [&](CallInst *BSwap) { IL.LowerIntrinsicCall(BSwap); });
+    IRB.CreateRet(BSwap);
     break;
   }
   default:
diff --git a/llvm/lib/Transforms/IPO/FatLTOCleanup.cpp b/llvm/lib/Transforms/IPO/FatLTOCleanup.cpp
index 6e90896304b56..eb942d4dd160a 100644
--- a/llvm/lib/Transforms/IPO/FatLTOCleanup.cpp
+++ b/llvm/lib/Transforms/IPO/FatLTOCleanup.cpp
@@ -79,11 +79,11 @@ static bool cleanUpTypeCheckedLoad(Module &M, Function &CheckedLoadFn,
     Value *Offset = I->getOperand(1);
     Type *PtrTy = I->getType()->getStructElementType(0);
     ConstantInt *True = ConstantInt::getTrue(M.getContext());
-    Value *Load;
+    Instruction *Load;
     if (IsRelative) {
-      Load =
-          IRB.CreateIntrinsic(Intrinsic::load_relative, {Offset->getType()},
-                              {Ptr, Offset}, /*FMFSource=*/nullptr, "rel_load");
+      Load = IRB.CreateIntrinsicWithoutFolding(
+          Intrinsic::load_relative, {Offset->getType()}, {Ptr, Offset},
+          /*FMFSource=*/nullptr, "rel_load");
     } else {
       Value *PtrAdd = IRB.CreatePtrAdd(Ptr, Offset);
       Load = IRB.CreateLoad(PtrTy, PtrAdd, "vfunc");
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 47c4e8b20cf15..dec9567fd0f0d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -23687,7 +23687,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       setInsertPointAfterBundle(E);
 
       LoadInst *LI = cast<LoadInst>(VL0);
-      Value *NewLI;
+      Instruction *NewLI;
       FixedVectorType *StridedLoadTy = nullptr;
       Value *PO = LI->getPointerOperand();
       if (E->State == TreeEntry::Vectorize) {
@@ -23809,7 +23809,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       VecValue = FinalShuffle(VecValue, E);
 
       Value *Ptr = SI->getPointerOperand();
-      Value *ST;
+      Instruction *ST;
       if (E->State == TreeEntry::Vectorize) {
         ST = Builder.CreateAlignedStore(VecValue, Ptr, SI->getAlign());
       } else {
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 7a9526b9c2fe3..5c72cb8ab4641 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -5936,14 +5936,12 @@ bool VectorCombine::foldBitcastOfVPLoad(Instruction &I) {
   unsigned Factor = NewVecCnt.getKnownScalarFactor(OrigVecCnt);
   Value *NewEVL = Builder.CreateNUWMul(EVL, Builder.getInt32(Factor));
   Value *NewMask = Builder.CreateVectorSplat(NewVecCnt, Builder.getTrue());
-  Value *NewVP = Builder.CreateIntrinsic(
+  CallInst *NewVP = Builder.CreateIntrinsicWithoutFolding(
       NewVecTy, Intrinsic::vp_load,
-      {II->getMemoryPointerParam(), NewMask, NewEVL}, {}, "",
-      [&](CallInst *CI) {
-        // Preserve the original alignment.
-        CI->addParamAttrs(
-            0, AttrBuilder(II->getContext()).addAlignmentAttr(OrigAlign));
-      });
+      {II->getMemoryPointerParam(), NewMask, NewEVL});
+  // Preserve the original alignment.
+  NewVP->addParamAttrs(
+      0, AttrBuilder(II->getContext()).addAlignmentAttr(OrigAlign));
   replaceValue(*Cast, *NewVP);
   return true;
 }
diff --git a/llvm/unittests/Transforms/Utils/LocalTest.cpp b/llvm/unittests/Transforms/Utils/LocalTest.cpp
index 616617853ac68..3c17687c11c8f 100644
--- a/llvm/unittests/Transforms/Utils/LocalTest.cpp
+++ b/llvm/unittests/Transforms/Utils/LocalTest.cpp
@@ -1114,7 +1114,7 @@ TEST(Local, CanReplaceOperandWithVariable) {
   // immarg.
   Type *PtrPtr = B.getPtrTy(0);
   Value *Alloca = B.CreateAlloca(PtrPtr, (unsigned)0);
-  auto *GCRoot = B.CreateIntrinsicWithoutFolding(
+  CallInst *GCRoot = B.CreateIntrinsicWithoutFolding(
       Intrinsic::gcroot, {Alloca, Constant::getNullValue(PtrPtr)});
   EXPECT_TRUE(canReplaceOperandWithVariable(GCRoot, 0)); // Alloca
   EXPECT_FALSE(canReplaceOperandWithVariable(GCRoot, 1));

>From 6141bae5be9d87759cb9b753ac1c35d798c0d2c9 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Thu, 11 Jun 2026 13:59:39 +0100
Subject: [PATCH 3/3] [SPIRV] Create BSwap without folding to fix test

---
 llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
index a26eb4b7fdae3..74b2e0105bc5b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
@@ -160,11 +160,11 @@ static bool lowerIntrinsicToFunction(IntrinsicInst *Intrinsic,
   case Intrinsic::bswap: {
     BasicBlock *EntryBB = BasicBlock::Create(M->getContext(), "entry", F);
     IRBuilder<> IRB(EntryBB);
-    IntrinsicLowering IL(M->getDataLayout());
-    Value *BSwap = IRB.CreateIntrinsic(
-        Intrinsic::bswap, Intrinsic->getType(), F->getArg(0), {}, "", {},
-        [&](CallInst *BSwap) { IL.LowerIntrinsicCall(BSwap); });
+    CallInst *BSwap = IRB.CreateIntrinsicWithoutFolding(
+        Intrinsic::bswap, Intrinsic->getType(), F->getArg(0));
     IRB.CreateRet(BSwap);
+    IntrinsicLowering IL(M->getDataLayout());
+    IL.LowerIntrinsicCall(BSwap);
     break;
   }
   default:



More information about the cfe-commits mailing list