[llvm] e75a2df - [tests] Stablize tests for possible change in deref semantics

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 14 13:05:49 PDT 2021


Author: Philip Reames
Date: 2021-07-14T13:05:43-07:00
New Revision: e75a2dfe209d9cc68e4d9813a38441c45d51d8bb

URL: https://github.com/llvm/llvm-project/commit/e75a2dfe209d9cc68e4d9813a38441c45d51d8bb
DIFF: https://github.com/llvm/llvm-project/commit/e75a2dfe209d9cc68e4d9813a38441c45d51d8bb.diff

LOG: [tests] Stablize tests for possible change in deref semantics

There's a potential change in dereferenceability attribute semantics in the nearish future.  See llvm-dev thread "RFC: Decomposing deref(N) into deref(N) + nofree" and D99100 for context.

This change simply adds appropriate attributes to tests to keep transform logic exercised under both old and new/proposed semantics.  Note that for many of these cases, O3 would infer exactly these attributes on the test IR.

This change handles the idiomatic pattern of a dereferenceable object being passed to a call which can not free that memory.  There's a couple other tests which need more one-off attention, they'll be handled in another change.

Added: 
    

Modified: 
    llvm/test/Analysis/ValueTracking/deref-bitcast-of-gep.ll
    llvm/test/CodeGen/X86/hoist-invariant-load.ll
    llvm/test/CodeGen/X86/licm-dominance.ll
    llvm/test/CodeGen/X86/load-partial.ll
    llvm/test/CodeGen/X86/memcmp-mergeexpand.ll
    llvm/test/Transforms/GVN/PRE/load-pre-licm.ll
    llvm/test/Transforms/GVN/PRE/pre-load.ll
    llvm/test/Transforms/GVN/loadpre-context.ll
    llvm/test/Transforms/InstCombine/call-guard.ll
    llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll
    llvm/test/Transforms/InstCombine/masked_intrinsics.ll
    llvm/test/Transforms/InstCombine/select.ll
    llvm/test/Transforms/InstCombine/strcmp-memcmp.ll
    llvm/test/Transforms/JumpThreading/guards.ll
    llvm/test/Transforms/LICM/hoist-deref-load.ll
    llvm/test/Transforms/MemCpyOpt/callslot_deref.ll
    llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll
    llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll
    llvm/test/Transforms/MergeICmps/X86/gep-references-bb.ll
    llvm/test/Transforms/MergeICmps/X86/int64-and-ptr.ll
    llvm/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll
    llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll
    llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll
    llvm/test/Transforms/SimplifyCFG/X86/SpeculativeExec.ll
    llvm/test/Transforms/TailCallElim/reorder_load.ll
    llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
    llvm/test/Transforms/VectorCombine/X86/load.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/ValueTracking/deref-bitcast-of-gep.ll b/llvm/test/Analysis/ValueTracking/deref-bitcast-of-gep.ll
index 23b4614fca813..8d5dc6a878681 100644
--- a/llvm/test/Analysis/ValueTracking/deref-bitcast-of-gep.ll
+++ b/llvm/test/Analysis/ValueTracking/deref-bitcast-of-gep.ll
@@ -9,7 +9,7 @@
 
 declare void @use(i32)
 
-define void @f_0(i8* align 4 dereferenceable(1024) %ptr) {
+define void @f_0(i8* align 4 dereferenceable(1024) %ptr) nofree nosync {
 ; CHECK-LABEL: @f_0(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[PTR_GEP:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i32 32
@@ -35,7 +35,7 @@ loop:
   br label %loop
 }
 
-define void @f_1(i8* align 4 dereferenceable_or_null(1024) %ptr) {
+define void @f_1(i8* align 4 dereferenceable_or_null(1024) %ptr) nofree nosync {
 ; CHECK-LABEL: @f_1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[PTR_GEP:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i32 32
@@ -106,7 +106,7 @@ leave:
   ret void
 }
 
-define void @checkLaunder(i8* align 4 dereferenceable(1024) %p) {
+define void @checkLaunder(i8* align 4 dereferenceable(1024) %p) nofree nosync {
 ; CHECK-LABEL: @checkLaunder(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[L:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* [[P:%.*]])

diff  --git a/llvm/test/CodeGen/X86/hoist-invariant-load.ll b/llvm/test/CodeGen/X86/hoist-invariant-load.ll
index 63ccbb818bfdd..a8f0655c20272 100644
--- a/llvm/test/CodeGen/X86/hoist-invariant-load.ll
+++ b/llvm/test/CodeGen/X86/hoist-invariant-load.ll
@@ -234,7 +234,7 @@ define void @test_multi_def(i64* dereferenceable(8) %x1,
 ; CHECK-NEXT:  ## %bb.3: ## %exit
 ; CHECK-NEXT:    retq
                             i64* dereferenceable(8) %x2,
-                            i128* %y, i64 %count) nounwind {
+                            i128* %y, i64 %count) nounwind nofree nosync {
 entry:
   br label %for.body
 
@@ -282,7 +282,7 @@ define void @test_div_def(i32* dereferenceable(8) %x1,
 ; CHECK-NEXT:  ## %bb.3: ## %exit
 ; CHECK-NEXT:    retq
                           i32* dereferenceable(8) %x2,
-                          i32* %y, i32 %count) nounwind {
+                          i32* %y, i32 %count) nounwind nofree nosync {
 entry:
   br label %for.body
 

diff  --git a/llvm/test/CodeGen/X86/licm-dominance.ll b/llvm/test/CodeGen/X86/licm-dominance.ll
index 0bcf413cdf166..900d9df1fc150 100644
--- a/llvm/test/CodeGen/X86/licm-dominance.ll
+++ b/llvm/test/CodeGen/X86/licm-dominance.ll
@@ -39,7 +39,7 @@ define void @CMSColorWorldCreateParametricData(
   i8* dereferenceable(1) %a1,
   i8* dereferenceable(1) %a2,
   i8* dereferenceable(1) %a3,
-  i64 %count) nounwind uwtable optsize ssp readonly {
+  i64 %count) nounwind uwtable optsize ssp readonly nofree nosync {
 entry:
   br label %for.body.i
 

diff  --git a/llvm/test/CodeGen/X86/load-partial.ll b/llvm/test/CodeGen/X86/load-partial.ll
index 27030aed8a47d..3c7bcd3e80d2c 100644
--- a/llvm/test/CodeGen/X86/load-partial.ll
+++ b/llvm/test/CodeGen/X86/load-partial.ll
@@ -9,7 +9,7 @@
 ; Partial Vector Loads - PR16739
 ;
 
-define <4 x float> @load_float4_float3(<4 x float>* nocapture readonly dereferenceable(16)) {
+define <4 x float> @load_float4_float3(<4 x float>* nocapture readonly dereferenceable(16)) nofree nosync {
 ; SSE-LABEL: load_float4_float3:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movups (%rdi), %xmm0
@@ -31,7 +31,7 @@ define <4 x float> @load_float4_float3(<4 x float>* nocapture readonly dereferen
   ret <4 x float> %r2
 }
 
-define <4 x float> @load_float4_float3_0122(<4 x float>* nocapture readonly dereferenceable(16)) {
+define <4 x float> @load_float4_float3_0122(<4 x float>* nocapture readonly dereferenceable(16)) nofree nosync {
 ; SSE-LABEL: load_float4_float3_0122:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
@@ -58,7 +58,7 @@ define <4 x float> @load_float4_float3_0122(<4 x float>* nocapture readonly dere
   ret <4 x float> %r3
 }
 
-define <8 x float> @load_float8_float3(<4 x float>* nocapture readonly dereferenceable(16)) {
+define <8 x float> @load_float8_float3(<4 x float>* nocapture readonly dereferenceable(16)) nofree nosync {
 ; SSE-LABEL: load_float8_float3:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movups (%rdi), %xmm0
@@ -80,7 +80,7 @@ define <8 x float> @load_float8_float3(<4 x float>* nocapture readonly dereferen
   ret <8 x float> %r2
 }
 
-define <8 x float> @load_float8_float3_0122(<4 x float>* nocapture readonly dereferenceable(16)) {
+define <8 x float> @load_float8_float3_0122(<4 x float>* nocapture readonly dereferenceable(16)) nofree nosync {
 ; SSE-LABEL: load_float8_float3_0122:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
@@ -107,7 +107,7 @@ define <8 x float> @load_float8_float3_0122(<4 x float>* nocapture readonly dere
   ret <8 x float> %r3
 }
 
-define <4 x float> @load_float4_float3_as_float2_float(<4 x float>* nocapture readonly dereferenceable(16)) {
+define <4 x float> @load_float4_float3_as_float2_float(<4 x float>* nocapture readonly dereferenceable(16)) nofree nosync {
 ; SSE-LABEL: load_float4_float3_as_float2_float:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movups (%rdi), %xmm0
@@ -129,7 +129,7 @@ define <4 x float> @load_float4_float3_as_float2_float(<4 x float>* nocapture re
   ret <4 x float> %10
 }
 
-define <4 x float> @load_float4_float3_as_float2_float_0122(<4 x float>* nocapture readonly dereferenceable(16)) {
+define <4 x float> @load_float4_float3_as_float2_float_0122(<4 x float>* nocapture readonly dereferenceable(16)) nofree nosync {
 ; SSE-LABEL: load_float4_float3_as_float2_float_0122:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
@@ -184,7 +184,7 @@ define <4 x float> @load_float4_float3_trunc(<4 x float>* nocapture readonly der
   ret <4 x float> %16
 }
 
-define <4 x float> @load_float4_float3_trunc_0122(<4 x float>* nocapture readonly dereferenceable(16)) {
+define <4 x float> @load_float4_float3_trunc_0122(<4 x float>* nocapture readonly dereferenceable(16)) nofree nosync {
 ; SSE-LABEL: load_float4_float3_trunc_0122:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
@@ -217,7 +217,7 @@ define <4 x float> @load_float4_float3_trunc_0122(<4 x float>* nocapture readonl
   ret <4 x float> %17
 }
 
-define <4 x float> @load_float4_float3_trunc_0123(<4 x float>* nocapture readonly dereferenceable(16)) {
+define <4 x float> @load_float4_float3_trunc_0123(<4 x float>* nocapture readonly dereferenceable(16)) nofree nosync {
 ; SSE2-LABEL: load_float4_float3_trunc_0123:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movaps (%rdi), %xmm0
@@ -266,7 +266,7 @@ define <4 x float> @load_float4_float3_trunc_0123(<4 x float>* nocapture readonl
 }
 
 ; PR21780
-define <4 x double> @load_double4_0u2u(double* nocapture readonly dereferenceable(32)) {
+define <4 x double> @load_double4_0u2u(double* nocapture readonly dereferenceable(32)) nofree nosync {
 ; SSE2-LABEL: load_double4_0u2u:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
@@ -302,7 +302,7 @@ define <4 x double> @load_double4_0u2u(double* nocapture readonly dereferenceabl
 
 ; Test case identified in rL366501
 @h = dso_local local_unnamed_addr global i8 0, align 1
-define dso_local i32 @load_partial_illegal_type() {
+define dso_local i32 @load_partial_illegal_type()  {
 ; SSE2-LABEL: load_partial_illegal_type:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movzwl h(%rip), %eax

diff  --git a/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll b/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll
index 0be463daaeb05..13b87ad2fa10b 100644
--- a/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll
+++ b/llvm/test/CodeGen/X86/memcmp-mergeexpand.ll
@@ -26,7 +26,7 @@ define zeroext i1 @opeq1(
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
   %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
-  %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
+  %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr nofree nosync {
 entry:
   %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
   %0 = load i32, i32* %first.i, align 4

diff  --git a/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll b/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll
index d4cbda17b51d8..644ce01078248 100644
--- a/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll
+++ b/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll
@@ -263,7 +263,7 @@ header:
   br label %header
 }
 
-define i32 @test6b(i1 %cnd, i32* dereferenceable(8) align 4 %p) {
+define i32 @test6b(i1 %cnd, i32* dereferenceable(8) align 4 %p) nofree nosync {
 ; CHECK-LABEL: @test6b(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[V1_PRE:%.*]] = load i32, i32* [[P:%.*]], align 4

diff  --git a/llvm/test/Transforms/GVN/PRE/pre-load.ll b/llvm/test/Transforms/GVN/PRE/pre-load.ll
index 161053d4fd857..8b0ea99d286cc 100644
--- a/llvm/test/Transforms/GVN/PRE/pre-load.ll
+++ b/llvm/test/Transforms/GVN/PRE/pre-load.ll
@@ -683,7 +683,7 @@ follow_2:
 ; dereferenceable can be loaded from speculatively without a risk of trapping.
 ; Since it is OK to speculate, PRE is allowed.
 
-define i32 @test15(i32* noalias nocapture readonly dereferenceable(8) align 4 %x, i32* noalias nocapture %r, i32 %a) {
+define i32 @test15(i32* noalias nocapture readonly dereferenceable(8) align 4 %x, i32* noalias nocapture %r, i32 %a) nofree nosync {
 ; CHECK-LABEL: @test15(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
@@ -724,7 +724,7 @@ if.end:
 ; dereferenceable can be loaded from speculatively without a risk of trapping.
 ; Since it is OK to speculate, PRE is allowed.
 
-define i32 @test16(i32* noalias nocapture readonly dereferenceable(8) align 4 %x, i32* noalias nocapture %r, i32 %a) {
+define i32 @test16(i32* noalias nocapture readonly dereferenceable(8) align 4 %x, i32* noalias nocapture %r, i32 %a) nofree nosync {
 ; CHECK-LABEL: @test16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0

diff  --git a/llvm/test/Transforms/GVN/loadpre-context.ll b/llvm/test/Transforms/GVN/loadpre-context.ll
index 1a178e71cc81d..f6148c0ed8312 100644
--- a/llvm/test/Transforms/GVN/loadpre-context.ll
+++ b/llvm/test/Transforms/GVN/loadpre-context.ll
@@ -3,7 +3,7 @@
 
 ; load may be speculated, address is not null using context search.
 ; There is a critical edge.
-define i32 @loadpre_critical_edge(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) {
+define i32 @loadpre_critical_edge(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) nofree nosync {
 ; CHECK-LABEL: @loadpre_critical_edge(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null
@@ -48,7 +48,7 @@ null_exit:
 }
 
 ; load may be speculated, address is not null using context search.
-define i32 @loadpre_basic(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) {
+define i32 @loadpre_basic(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) nofree nosync {
 ; CHECK-LABEL: @loadpre_basic(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null
@@ -96,7 +96,7 @@ null_exit:
 }
 
 ; load cannot be speculated, check "address is not null" does not dominate the loop.
-define i32 @loadpre_maybe_null(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N, i1 %c) {
+define i32 @loadpre_maybe_null(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N, i1 %c) nofree nosync {
 ; CHECK-LABEL: @loadpre_maybe_null(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[NULL_CHECK:%.*]], label [[PREHEADER:%.*]]

diff  --git a/llvm/test/Transforms/InstCombine/call-guard.ll b/llvm/test/Transforms/InstCombine/call-guard.ll
index c219300fdaf11..e3f678604103f 100644
--- a/llvm/test/Transforms/InstCombine/call-guard.ll
+++ b/llvm/test/Transforms/InstCombine/call-guard.ll
@@ -77,7 +77,7 @@ define void @negative_load(i32 %V1, i32* %P) {
   ret void
 }
 
-define void @deref_load(i32 %V1, i32* dereferenceable(4) align 4 %P) {
+define void @deref_load(i32 %V1, i32* dereferenceable(4) align 4 %P) nofree nosync {
 ; CHECK-LABEL: @deref_load(
 ; CHECK-NEXT:    [[V2:%.*]] = load i32, i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[V2]], [[V1:%.*]]

diff  --git a/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll
index 9065d53832fcb..c8023d503a149 100644
--- a/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll
@@ -82,7 +82,7 @@ define <2 x double> @load_generic(<2 x double>* %ptr, double %pt, <2 x i1> %mask
   ret <2 x double> %res
 }
 
-define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 %ptr, double %pt, <2 x i1> %mask)  {
+define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 %ptr, double %pt, <2 x i1> %mask) nofree nosync {
 ; CHECK-LABEL: @load_speculative(
 ; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
 ; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
@@ -96,7 +96,7 @@ define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4
   ret <2 x double> %res
 }
 
-define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable(16) %ptr, double %pt, <2 x i1> %mask)  {
+define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable(16) %ptr, double %pt, <2 x i1> %mask) nofree nosync {
 ; CHECK-LABEL: @load_speculative_less_aligned(
 ; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
 ; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
@@ -112,7 +112,7 @@ define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable
 
 ; Can't speculate since only half of required size is known deref
 
-define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask)  {
+define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) nofree nosync {
 ; CHECK-LABEL: @load_spec_neg_size(
 ; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
 ; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
@@ -126,7 +126,7 @@ define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, d
 }
 
 ; Can only speculate one lane (but it's the only one active)
-define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask)  {
+define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) nofree nosync {
 ; CHECK-LABEL: @load_spec_lan0(
 ; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
 ; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer

diff  --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
index 48ebab46a2899..e501dbb093982 100644
--- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
@@ -82,7 +82,7 @@ define <2 x double> @load_generic(<2 x double>* %ptr, double %pt, <2 x i1> %mask
   ret <2 x double> %res
 }
 
-define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 %ptr, double %pt, <2 x i1> %mask)  {
+define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 %ptr, double %pt, <2 x i1> %mask) nofree nosync {
 ; CHECK-LABEL: @load_speculative(
 ; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
 ; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
@@ -96,7 +96,7 @@ define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4
   ret <2 x double> %res
 }
 
-define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable(16) %ptr, double %pt, <2 x i1> %mask)  {
+define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable(16) %ptr, double %pt, <2 x i1> %mask) nofree nosync {
 ; CHECK-LABEL: @load_speculative_less_aligned(
 ; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
 ; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
@@ -112,7 +112,7 @@ define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable
 
 ; Can't speculate since only half of required size is known deref
 
-define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask)  {
+define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) nofree nosync {
 ; CHECK-LABEL: @load_spec_neg_size(
 ; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
 ; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
@@ -126,7 +126,7 @@ define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, d
 }
 
 ; Can only speculate one lane (but it's the only one active)
-define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask)  {
+define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) nofree nosync {
 ; CHECK-LABEL: @load_spec_lan0(
 ; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
 ; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer

diff  --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index dcaa703371157..b49188ddd69f0 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -1072,7 +1072,7 @@ entry:
 
 ; Test that we can speculate the loads around the select even when we can't
 ; fold the load completely away.
-define i32 @test78_deref(i1 %flag, i32* dereferenceable(4) align 4 %x, i32* dereferenceable(4) align 4 %y, i32* %z) {
+define i32 @test78_deref(i1 %flag, i32* dereferenceable(4) align 4 %x, i32* dereferenceable(4) align 4 %y, i32* %z) nofree nosync {
 ; CHECK-LABEL: @test78_deref(
 ; CHECK-NEXT:    [[X_VAL:%.*]] = load i32, i32* [[X:%.*]], align 4
 ; CHECK-NEXT:    [[Y_VAL:%.*]] = load i32, i32* [[Y:%.*]], align 4
@@ -1106,7 +1106,7 @@ define i32 @test78_neg(i1 %flag, i32* %x, i32* %y, i32* %z) {
 
 ; The same as @test78_deref but we can't speculate the load because
 ; one of the arguments is not sufficiently dereferenceable.
-define i32 @test78_deref_neg(i1 %flag, i32* dereferenceable(2) %x, i32* dereferenceable(4) %y, i32* %z) {
+define i32 @test78_deref_neg(i1 %flag, i32* dereferenceable(2) %x, i32* dereferenceable(4) %y, i32* %z) nofree nosync {
 ; CHECK-LABEL: @test78_deref_neg(
 ; CHECK-NEXT:    [[P:%.*]] = select i1 [[FLAG:%.*]], i32* [[X:%.*]], i32* [[Y:%.*]]
 ; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P]], align 4

diff  --git a/llvm/test/Transforms/InstCombine/strcmp-memcmp.ll b/llvm/test/Transforms/InstCombine/strcmp-memcmp.ll
index 6e215aaf6462a..0b9261223a076 100644
--- a/llvm/test/Transforms/InstCombine/strcmp-memcmp.ll
+++ b/llvm/test/Transforms/InstCombine/strcmp-memcmp.ll
@@ -8,7 +8,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 declare void @use(i32)
 
-define i32 @strcmp_memcmp([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
@@ -25,7 +25,7 @@ define i32 @strcmp_memcmp([12 x i8]* dereferenceable (12) %buf) {
 
 declare i32 @strcmp(i8* nocapture, i8* nocapture)
 
-define i32 @strcmp_memcmp2([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp2([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp2(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
@@ -40,7 +40,7 @@ define i32 @strcmp_memcmp2([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strcmp_memcmp3([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp3([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp3(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
@@ -55,7 +55,7 @@ define i32 @strcmp_memcmp3([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strcmp_memcmp4([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp4([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp4(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
@@ -70,7 +70,7 @@ define i32 @strcmp_memcmp4([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strcmp_memcmp5([5 x i8]* dereferenceable (5) %buf) {
+define i32 @strcmp_memcmp5([5 x i8]* dereferenceable (5) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp5(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [5 x i8], [5 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
@@ -85,7 +85,7 @@ define i32 @strcmp_memcmp5([5 x i8]* dereferenceable (5) %buf) {
   ret i32 %conv
 }
 
-define i32 @strcmp_memcmp6([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp6([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp6(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
@@ -100,7 +100,7 @@ define i32 @strcmp_memcmp6([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strcmp_memcmp7([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp7([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp7(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
@@ -114,7 +114,7 @@ define i32 @strcmp_memcmp7([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strcmp_memcmp8([4 x i8]* dereferenceable (4) %buf) {
+define i32 @strcmp_memcmp8([4 x i8]* dereferenceable (4) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp8(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
@@ -129,7 +129,7 @@ define i32 @strcmp_memcmp8([4 x i8]* dereferenceable (4) %buf) {
   ret i32 %conv
 }
 
-define i32 @strcmp_memcmp9([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp9([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp9(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([8 x i8], [8 x i8]* @abc, i64 0, i64 0), i64 4)
@@ -145,7 +145,7 @@ define i32 @strcmp_memcmp9([12 x i8]* dereferenceable (12) %buf) {
 }
 
 
-define i32 @strncmp_memcmp([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(2) [[STRING]], i8* noundef nonnull dereferenceable(2) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 2)
@@ -162,7 +162,7 @@ define i32 @strncmp_memcmp([12 x i8]* dereferenceable (12) %buf) {
 
 declare i32 @strncmp(i8* nocapture, i8* nocapture, i64)
 
-define i32 @strncmp_memcmp2([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp2([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp2(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
@@ -177,7 +177,7 @@ define i32 @strncmp_memcmp2([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strncmp_memcmp3([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp3([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp3(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
@@ -192,7 +192,7 @@ define i32 @strncmp_memcmp3([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strncmp_memcmp4([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp4([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp4(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
@@ -207,7 +207,7 @@ define i32 @strncmp_memcmp4([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strncmp_memcmp5([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp5([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp5(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
@@ -223,7 +223,7 @@ define i32 @strncmp_memcmp5([12 x i8]* dereferenceable (12) %buf) {
 }
 
 
-define i32 @strncmp_memcmp6([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp6([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp6(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
@@ -238,7 +238,7 @@ define i32 @strncmp_memcmp6([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strncmp_memcmp7([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp7([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp7(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
@@ -253,7 +253,7 @@ define i32 @strncmp_memcmp7([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strncmp_memcmp8([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp8([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp8(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(3) [[STRING]], i8* noundef nonnull dereferenceable(3) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 3)
@@ -268,7 +268,7 @@ define i32 @strncmp_memcmp8([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strncmp_memcmp9([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp9([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp9(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
@@ -283,7 +283,7 @@ define i32 @strncmp_memcmp9([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strncmp_memcmp10([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp10([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp10(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
@@ -297,7 +297,7 @@ define i32 @strncmp_memcmp10([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strncmp_memcmp11([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp11([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp11(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
@@ -312,7 +312,7 @@ define i32 @strncmp_memcmp11([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strncmp_memcmp12([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp12([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp12(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
@@ -327,7 +327,7 @@ define i32 @strncmp_memcmp12([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strncmp_memcmp13([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp13([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp13(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(2) [[STRING]], i8* noundef nonnull dereferenceable(2) getelementptr inbounds ([8 x i8], [8 x i8]* @abc, i64 0, i64 0), i64 2)
@@ -342,7 +342,7 @@ define i32 @strncmp_memcmp13([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strncmp_memcmp14([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp14([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp14(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([8 x i8], [8 x i8]* @abc, i64 0, i64 0), i64 4)
@@ -358,7 +358,7 @@ define i32 @strncmp_memcmp14([12 x i8]* dereferenceable (12) %buf) {
 }
 
 ; Negative tests
-define i32 @strcmp_memcmp_bad([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp_bad([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp_bad(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(i8* noundef nonnull [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
@@ -373,7 +373,7 @@ define i32 @strcmp_memcmp_bad([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strcmp_memcmp_bad2([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp_bad2([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp_bad2(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull [[STRING]])
@@ -388,7 +388,7 @@ define i32 @strcmp_memcmp_bad2([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strcmp_memcmp_bad3([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp_bad3([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp_bad3(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(i8* noundef nonnull [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
@@ -400,7 +400,7 @@ define i32 @strcmp_memcmp_bad3([12 x i8]* dereferenceable (12) %buf) {
 }
 
 
-define i32 @strcmp_memcmp_bad4(i8* nocapture readonly %buf) {
+define i32 @strcmp_memcmp_bad4(i8* nocapture readonly %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp_bad4(
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @strcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) [[BUF:%.*]])
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
@@ -414,7 +414,7 @@ define i32 @strcmp_memcmp_bad4(i8* nocapture readonly %buf) {
 }
 
 
-define i32 @strcmp_memcmp_bad5([3 x i8]* dereferenceable (3) %buf) {
+define i32 @strcmp_memcmp_bad5([3 x i8]* dereferenceable (3) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp_bad5(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [3 x i8], [3 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(i8* noundef nonnull [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
@@ -429,7 +429,7 @@ define i32 @strcmp_memcmp_bad5([3 x i8]* dereferenceable (3) %buf) {
   ret i32 %conv
 }
 
-define i32 @strcmp_memcmp_bad6([4 x i8]* dereferenceable (4) %buf, i8* nocapture readonly %k) {
+define i32 @strcmp_memcmp_bad6([4 x i8]* dereferenceable (4) %buf, i8* nocapture readonly %k) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp_bad6(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(i8* noundef nonnull [[STRING]], i8* noundef nonnull dereferenceable(1) [[K:%.*]])
@@ -444,7 +444,7 @@ define i32 @strcmp_memcmp_bad6([4 x i8]* dereferenceable (4) %buf, i8* nocapture
   ret i32 %conv
 }
 
-define i32 @strcmp_memcmp_bad7(i8* nocapture readonly %k) {
+define i32 @strcmp_memcmp_bad7(i8* nocapture readonly %k) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp_bad7(
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @strcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) [[K:%.*]])
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
@@ -457,7 +457,7 @@ define i32 @strcmp_memcmp_bad7(i8* nocapture readonly %k) {
   ret i32 %conv
 }
 
-define i32 @strcmp_memcmp_bad8([4 x i8]* dereferenceable (4) %buf) {
+define i32 @strcmp_memcmp_bad8([4 x i8]* dereferenceable (4) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp_bad8(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(i8* noundef nonnull [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
@@ -470,7 +470,7 @@ define i32 @strcmp_memcmp_bad8([4 x i8]* dereferenceable (4) %buf) {
   ret i32 0
 }
 
-define i32 @strncmp_memcmp_bad([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp_bad([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp_bad(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strncmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull [[STRING]], i64 5)
@@ -486,7 +486,7 @@ define i32 @strncmp_memcmp_bad([12 x i8]* dereferenceable (12) %buf) {
 }
 
 
-define i32 @strncmp_memcmp_bad1([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp_bad1([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp_bad1(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strncmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull [[STRING]], i64 5)
@@ -501,7 +501,7 @@ define i32 @strncmp_memcmp_bad1([12 x i8]* dereferenceable (12) %buf) {
   ret i32 %conv
 }
 
-define i32 @strncmp_memcmp_bad2([12 x i8]* dereferenceable (12) %buf, i64 %n) {
+define i32 @strncmp_memcmp_bad2([12 x i8]* dereferenceable (12) %buf, i64 %n) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp_bad2(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strncmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull [[STRING]], i64 [[N:%.*]])
@@ -516,7 +516,7 @@ define i32 @strncmp_memcmp_bad2([12 x i8]* dereferenceable (12) %buf, i64 %n) {
   ret i32 %conv
 }
 
-define i32 @strncmp_memcmp_bad3(i8* nocapture readonly %k) {
+define i32 @strncmp_memcmp_bad3(i8* nocapture readonly %k) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp_bad3(
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @strncmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) [[K:%.*]], i64 2)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
@@ -529,7 +529,7 @@ define i32 @strncmp_memcmp_bad3(i8* nocapture readonly %k) {
   ret i32 %conv
 }
 
-define i32 @strncmp_memcmp_bad4([4 x i8]* dereferenceable (4) %buf) {
+define i32 @strncmp_memcmp_bad4([4 x i8]* dereferenceable (4) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp_bad4(
 ; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[BUF:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strncmp(i8* noundef nonnull [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 2)

diff  --git a/llvm/test/Transforms/JumpThreading/guards.ll b/llvm/test/Transforms/JumpThreading/guards.ll
index f487efa045b0f..5868947205ee0 100644
--- a/llvm/test/Transforms/JumpThreading/guards.ll
+++ b/llvm/test/Transforms/JumpThreading/guards.ll
@@ -358,7 +358,7 @@ exit:                                             ; preds = %loop
 }
 
 ; Make sure that we can safely PRE a speculable load across a guard.
-define void @safe_pre_across_guard(i8* noalias nocapture readonly dereferenceable(8) %p, i1 %load.is.valid) {
+define void @safe_pre_across_guard(i8* noalias nocapture readonly dereferenceable(8) %p, i1 %load.is.valid) nofree nosync {
 ; CHECK-LABEL: @safe_pre_across_guard(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[LOADED_PR:%.*]] = load i8, i8* [[P:%.*]], align 1
@@ -413,7 +413,7 @@ exit:                                             ; preds = %loop
 }
 
 ; Make sure that we can safely PRE a speculable load across a call.
-define void @safe_pre_across_call(i8* noalias nocapture readonly dereferenceable(8) %p) {
+define void @safe_pre_across_call(i8* noalias nocapture readonly dereferenceable(8) %p) nofree nosync {
 ; CHECK-LABEL: @safe_pre_across_call(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[LOADED_PR:%.*]] = load i8, i8* [[P:%.*]], align 1

diff  --git a/llvm/test/Transforms/LICM/hoist-deref-load.ll b/llvm/test/Transforms/LICM/hoist-deref-load.ll
index e198394bc92a9..b13be8c9fb9a5 100644
--- a/llvm/test/Transforms/LICM/hoist-deref-load.ll
+++ b/llvm/test/Transforms/LICM/hoist-deref-load.ll
@@ -1166,5 +1166,5 @@ for.end:                                          ; preds = %for.inc, %entry
   ret void
 }
 
-attributes #0 = { nounwind uwtable }
+attributes #0 = { nounwind uwtable nofree nosync }
 !0 = !{i64 4}

diff  --git a/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll b/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll
index 813bc5173ea13..5daa3538b294a 100644
--- a/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll
+++ b/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll
@@ -7,7 +7,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
 
 ; all bytes of %dst that are touch by the memset are dereferenceable
-define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) {
+define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) nofree nosync {
 ; CHECK-LABEL: @must_remove_memcpy(
 ; CHECK-NEXT:    [[SRC:%.*]] = alloca [4096 x i8], align 1
 ; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds [4096 x i8], [4096 x i8]* [[SRC]], i64 0, i64 0
@@ -25,7 +25,7 @@ define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst
 
 ; memset touch more bytes than those guaranteed to be dereferenceable
 ; We can't remove the memcpy, but we can turn it into an independent memset.
-define void @must_not_remove_memcpy(i8* noalias nocapture dereferenceable(1024) %dst) {
+define void @must_not_remove_memcpy(i8* noalias nocapture dereferenceable(1024) %dst) nofree nosync {
 ; CHECK-LABEL: @must_not_remove_memcpy(
 ; CHECK-NEXT:    [[SRC:%.*]] = alloca [4096 x i8], align 1
 ; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds [4096 x i8], [4096 x i8]* [[SRC]], i64 0, i64 0

diff  --git a/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll b/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll
index 25a73fa04bc1d..a8d28efaa5418 100644
--- a/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll
@@ -19,7 +19,7 @@ define zeroext i1 @opeq1(
 ; X86-NEXT:    ret i1 [[TMP2]]
 ;
   %S* nocapture readonly dereferenceable(16) %a,
-  %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
+  %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync {
 
 entry:
   %ptr = alloca i32

diff  --git a/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll b/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll
index 7a5a6e4091050..727d6680cc5ee 100644
--- a/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll
@@ -36,7 +36,7 @@ define zeroext i1 @opeq1(
 ; CHECK-NEXT:    ret i1 [[TMP13]]
 ;
   %S* nocapture readonly dereferenceable(16) %a,
-  %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
+  %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync {
 entry:
   %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 3
   %0 = load i32, i32* %first.i, align 4

diff  --git a/llvm/test/Transforms/MergeICmps/X86/gep-references-bb.ll b/llvm/test/Transforms/MergeICmps/X86/gep-references-bb.ll
index c42667da1df8e..501df86133d37 100644
--- a/llvm/test/Transforms/MergeICmps/X86/gep-references-bb.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/gep-references-bb.ll
@@ -8,7 +8,7 @@ target triple = "x86_64"
 
 ; bb1 references a gep introduced in bb0. The gep must remain available after
 ; the merge.
-define i1 @bug(%Triple* nonnull dereferenceable(16) %lhs, %Triple* nonnull dereferenceable(16) %rhs) {
+define i1 @bug(%Triple* nonnull dereferenceable(16) %lhs, %Triple* nonnull dereferenceable(16) %rhs) nofree nosync {
 ; CHECK-LABEL: @bug(
 ; CHECK-NEXT:  bb0:
 ; CHECK-NEXT:    store i32 1, i32* @g, align 4

diff  --git a/llvm/test/Transforms/MergeICmps/X86/int64-and-ptr.ll b/llvm/test/Transforms/MergeICmps/X86/int64-and-ptr.ll
index ad19eec760fe4..f598d74fa4ea7 100644
--- a/llvm/test/Transforms/MergeICmps/X86/int64-and-ptr.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/int64-and-ptr.ll
@@ -7,7 +7,7 @@
 %struct.inner = type { i32, i32, i32 }
 
 ; Function Attrs: nounwind uwtable
-define i1 @test(%struct.outer* align 8 dereferenceable(16) %o1, %struct.outer* align 8 dereferenceable(116) %o2) local_unnamed_addr #0 {
+define i1 @test(%struct.outer* align 8 dereferenceable(16) %o1, %struct.outer* align 8 dereferenceable(116) %o2) local_unnamed_addr nofree nosync {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  "entry+if.then":
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_OUTER:%.*]], %struct.outer* [[O1:%.*]], i64 0, i32 0

diff  --git a/llvm/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll b/llvm/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll
index daf464df9a72c..b921e4e053958 100644
--- a/llvm/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll
@@ -37,7 +37,7 @@ define zeroext i1 @opeq1(
 ; X86-NEXT:    ret i1 [[TMP7]]
 ;
   %S* nocapture readonly dereferenceable(16) %a,
-  %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
+  %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync {
 entry:
   %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0
   %0 = load i32, i32* %first.i, align 4

diff  --git a/llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll b/llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll
index 66ba101cdb25e..7a2690b142f44 100644
--- a/llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll
@@ -37,7 +37,7 @@ define zeroext i1 @opeq1(
 ; X86-NOBUILTIN-NEXT:    ret i1 [[TMP4]]
 ;
   %S* nocapture readonly dereferenceable(8) %a,
-  %S* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
+  %S* nocapture readonly dereferenceable(8) %b) local_unnamed_addr nofree nosync {
 entry:
   %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0
   %0 = load i32, i32* %first.i, align 4
@@ -97,7 +97,7 @@ define zeroext i1 @opeq1_inverse(
 ; X86-NOBUILTIN-NEXT:    ret i1 [[TMP4]]
 ;
   %S* nocapture readonly dereferenceable(8) %a,
-  %S* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
+  %S* nocapture readonly dereferenceable(8) %b) local_unnamed_addr nofree nosync {
 entry:
   %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1
   %0 = load i32, i32* %first.i, align 4

diff  --git a/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll b/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll
index a0efd441f55a0..cb911ebbf1255 100644
--- a/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll
@@ -22,7 +22,7 @@ define zeroext i1 @opeq1(
 ;
 ; Make sure this call is moved to the beginning of the entry block.
   %S* nocapture readonly dereferenceable(16) %a,
-  %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
+  %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync {
 entry:
   %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0
   %0 = load i32, i32* %first.i, align 4

diff  --git a/llvm/test/Transforms/SimplifyCFG/X86/SpeculativeExec.ll b/llvm/test/Transforms/SimplifyCFG/X86/SpeculativeExec.ll
index bcac5f9831689..266927272b4ca 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/SpeculativeExec.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/SpeculativeExec.ll
@@ -119,7 +119,7 @@ end:
   ret i8* %x10
 }
 
-define i32* @test5(i32 %a, i32 %b, i32 %c, i32* dereferenceable(10) %ptr1, i32* dereferenceable(10) %ptr2, i32** dereferenceable(10) align 8 %ptr3) {
+define i32* @test5(i32 %a, i32 %b, i32 %c, i32* dereferenceable(10) %ptr1, i32* dereferenceable(10) %ptr2, i32** dereferenceable(10) align 8 %ptr3) nofree nosync {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[T1:%.*]] = icmp eq i32 [[B:%.*]], 0

diff  --git a/llvm/test/Transforms/TailCallElim/reorder_load.ll b/llvm/test/Transforms/TailCallElim/reorder_load.ll
index 59d9703a80ac7..e535c9a6524ac 100644
--- a/llvm/test/Transforms/TailCallElim/reorder_load.ll
+++ b/llvm/test/Transforms/TailCallElim/reorder_load.ll
@@ -186,7 +186,7 @@ recurse:		; preds = %else
 
 ; This load can be moved above the call because the function won't write to it
 ; and the a_arg is dereferenceable.
-define fastcc i32 @raise_load_5(i32* dereferenceable(4) align 4 %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+define fastcc i32 @raise_load_5(i32* dereferenceable(4) align 4 %a_arg, i32 %a_len_arg, i32 %start_arg) readonly nofree nosync {
 ; CHECK-LABEL: @raise_load_5(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]

diff  --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
index 90bdf62b3ee10..3947aba0c8cb1 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
@@ -172,7 +172,7 @@ define double @larger_fp_scalar_256bit_vec(<8 x float>* align 32 dereferenceable
   ret double %r
 }
 
-define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p) {
+define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
@@ -184,7 +184,7 @@ define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p
   ret <4 x float> %r
 }
 
-define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 dereferenceable(16) %p) {
+define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @casted_load_f32_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 4
 ; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@@ -198,7 +198,7 @@ define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 dereferenc
 
 ; Element type does not change cost.
 
-define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) {
+define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_i32_insert_v4i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16
@@ -212,7 +212,7 @@ define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) {
 
 ; Pointer type does not change cost.
 
-define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 dereferenceable(16) %p) {
+define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @casted_load_i32_insert_v4i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
@@ -227,7 +227,7 @@ define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 dereferenceabl
 
 ; This is canonical form for vector element access.
 
-define <4 x float> @gep00_load_f32_insert_v4f32(<4 x float>* align 16 dereferenceable(16) %p) {
+define <4 x float> @gep00_load_f32_insert_v4f32(<4 x float>* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @gep00_load_f32_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 16
 ; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@@ -241,7 +241,7 @@ define <4 x float> @gep00_load_f32_insert_v4f32(<4 x float>* align 16 dereferenc
 
 ; Should work with addrspace as well.
 
-define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(<4 x float> addrspace(44)* align 16 dereferenceable(16) %p) {
+define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(<4 x float> addrspace(44)* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float> addrspace(44)* [[P:%.*]], align 16
 ; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@@ -255,7 +255,7 @@ define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(<4 x float> addrspace(
 
 ; If there are enough dereferenceable bytes, we can offset the vector load.
 
-define <8 x i16> @gep01_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(18) %p) {
+define <8 x i16> @gep01_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(18) %p) nofree nosync {
 ; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[GEP]] to <8 x i16>*
@@ -271,7 +271,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceabl
 
 ; Can't safely load the offset vector, but can load+shuffle if it is profitable.
 
-define <8 x i16> @gep01_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(17) %p) {
+define <8 x i16> @gep01_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(17) %p) nofree nosync {
 ; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref(
 ; SSE2-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
 ; SSE2-NEXT:    [[S:%.*]] = load i16, i16* [[GEP]], align 2
@@ -291,7 +291,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(<8 x i16>* align 16 derefere
 
 ; Verify that alignment of the new load is not over-specified.
 
-define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2 dereferenceable(16) %p) {
+define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2 dereferenceable(16) %p) nofree nosync {
 ; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
 ; SSE2-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
 ; SSE2-NEXT:    [[S:%.*]] = load i16, i16* [[GEP]], align 8
@@ -313,7 +313,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2
 ; must be a multiple of element size.
 ; TODO: Could bitcast around this limitation.
 
-define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(16) %p) {
+define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @gep01_bitcast_load_i32_insert_v4i32(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 1
 ; CHECK-NEXT:    [[B:%.*]] = bitcast i8* [[GEP]] to i32*
@@ -328,7 +328,7 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 derefer
   ret <4 x i32> %r
 }
 
-define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) {
+define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) nofree nosync {
 ; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
@@ -346,7 +346,7 @@ define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 derefe
 ; must be a multiple of element size and the offset must be low enough to fit in the vector
 ; (bitcasting would not help this case).
 
-define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) {
+define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) nofree nosync {
 ; CHECK-LABEL: @gep013_bitcast_load_i32_insert_v4i32(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 13
 ; CHECK-NEXT:    [[B:%.*]] = bitcast i8* [[GEP]] to i32*
@@ -363,7 +363,7 @@ define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 derefe
 
 ; If there are enough dereferenceable bytes, we can offset the vector load.
 
-define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(32) %p) {
+define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(32) %p) nofree nosync {
 ; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[GEP]] to <8 x i16>*
@@ -439,7 +439,7 @@ define <8 x i16> @gep10_load_i16_insert_v8i16_tsan(<8 x i16>* align 16 dereferen
 
 ; Negative test - can't safely load the offset vector, but could load+shuffle.
 
-define <8 x i16> @gep10_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(31) %p) {
+define <8 x i16> @gep10_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(31) %p) nofree nosync {
 ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_deref(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
 ; CHECK-NEXT:    [[S:%.*]] = load i16, i16* [[GEP]], align 16
@@ -454,7 +454,7 @@ define <8 x i16> @gep10_load_i16_insert_v8i16_deref(<8 x i16>* align 16 derefere
 
 ; Negative test - do not alter volatile.
 
-define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceable(16) %p) {
+define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v4f32_volatile(
 ; CHECK-NEXT:    [[S:%.*]] = load volatile float, float* [[P:%.*]], align 4
 ; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> poison, float [[S]], i32 0
@@ -468,7 +468,7 @@ define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceab
 ; Pointer is not as aligned as load, but that's ok.
 ; The new load uses the larger alignment value.
 
-define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(16) %p) {
+define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v4f32_align(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
@@ -482,7 +482,7 @@ define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(1
 
 ; Negative test - not enough bytes.
 
-define <4 x float> @load_f32_insert_v4f32_deref(float* align 4 dereferenceable(15) %p) {
+define <4 x float> @load_f32_insert_v4f32_deref(float* align 4 dereferenceable(15) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v4f32_deref(
 ; CHECK-NEXT:    [[S:%.*]] = load float, float* [[P:%.*]], align 4
 ; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> poison, float [[S]], i32 0
@@ -493,7 +493,7 @@ define <4 x float> @load_f32_insert_v4f32_deref(float* align 4 dereferenceable(1
   ret <4 x float> %r
 }
 
-define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) {
+define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_i32_insert_v8i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16
@@ -505,7 +505,7 @@ define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) {
   ret <8 x i32> %r
 }
 
-define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceable(16) %p) {
+define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @casted_load_i32_insert_v8i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4
 ; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -517,7 +517,7 @@ define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceabl
   ret <8 x i32> %r
 }
 
-define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) %p) {
+define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v16f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
@@ -529,7 +529,7 @@ define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16)
   ret <16 x float> %r
 }
 
-define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p) {
+define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v2f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
@@ -555,7 +555,7 @@ define <2 x float> @load_f32_insert_v2f32_asan(float* align 16 dereferenceable(1
 }
 
 declare float* @getscaleptr()
-define void @PR47558_multiple_use_load(<2 x float>* nocapture nonnull %resultptr, <2 x float>* nocapture nonnull readonly %opptr) {
+define void @PR47558_multiple_use_load(<2 x float>* nocapture nonnull %resultptr, <2 x float>* nocapture nonnull readonly %opptr) nofree nosync {
 ; CHECK-LABEL: @PR47558_multiple_use_load(
 ; CHECK-NEXT:    [[SCALEPTR:%.*]] = tail call nonnull align 16 dereferenceable(64) float* @getscaleptr()
 ; CHECK-NEXT:    [[OP:%.*]] = load <2 x float>, <2 x float>* [[OPPTR:%.*]], align 4
@@ -584,7 +584,7 @@ define void @PR47558_multiple_use_load(<2 x float>* nocapture nonnull %resultptr
   ret void
 }
 
-define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 dereferenceable(16) %p) {
+define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_v2f32_extract_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x float>* [[P:%.*]] to <4 x float>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
@@ -597,7 +597,7 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 derefe
   ret <4 x float> %r
 }
 
-define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 dereferenceable(16) %p) {
+define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_v8f32_extract_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x float>* [[P:%.*]] to <4 x float>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
@@ -610,7 +610,7 @@ define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 derefe
   ret <4 x float> %r
 }
 
-define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16 dereferenceable(16) %p, <1 x i32>* %store_ptr) {
+define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16 dereferenceable(16) %p, <1 x i32>* %store_ptr) nofree nosync {
 ; CHECK-LABEL: @load_v1i32_extract_insert_v8i32_extra_use(
 ; CHECK-NEXT:    [[L:%.*]] = load <1 x i32>, <1 x i32>* [[P:%.*]], align 4
 ; CHECK-NEXT:    store <1 x i32> [[L]], <1 x i32>* [[STORE_PTR:%.*]], align 4
@@ -627,7 +627,7 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16
 
 ; Can't safely load the offset vector, but can load+shuffle if it is profitable.
 
-define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 1 dereferenceable(16) %p) {
+define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 1 dereferenceable(16) %p) nofree nosync {
 ; SSE2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
 ; SSE2-NEXT:    [[GEP:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* [[P:%.*]], i64 1
 ; SSE2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* [[GEP]], i32 0, i32 0

diff  --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll
index c2fa94c17cf47..d280f12ae9dfa 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -172,7 +172,7 @@ define double @larger_fp_scalar_256bit_vec(<8 x float>* align 32 dereferenceable
   ret double %r
 }
 
-define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p) {
+define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
@@ -184,7 +184,7 @@ define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p
   ret <4 x float> %r
 }
 
-define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 dereferenceable(16) %p) {
+define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @casted_load_f32_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 4
 ; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@@ -198,7 +198,7 @@ define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 dereferenc
 
 ; Element type does not change cost.
 
-define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) {
+define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_i32_insert_v4i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16
@@ -212,7 +212,7 @@ define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) {
 
 ; Pointer type does not change cost.
 
-define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 dereferenceable(16) %p) {
+define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @casted_load_i32_insert_v4i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
@@ -227,7 +227,7 @@ define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 dereferenceabl
 
 ; This is canonical form for vector element access.
 
-define <4 x float> @gep00_load_f32_insert_v4f32(<4 x float>* align 16 dereferenceable(16) %p) {
+define <4 x float> @gep00_load_f32_insert_v4f32(<4 x float>* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @gep00_load_f32_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 16
 ; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@@ -241,7 +241,7 @@ define <4 x float> @gep00_load_f32_insert_v4f32(<4 x float>* align 16 dereferenc
 
 ; Should work with addrspace as well.
 
-define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(<4 x float> addrspace(44)* align 16 dereferenceable(16) %p) {
+define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(<4 x float> addrspace(44)* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float> addrspace(44)* [[P:%.*]], align 16
 ; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@@ -255,7 +255,7 @@ define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(<4 x float> addrspace(
 
 ; If there are enough dereferenceable bytes, we can offset the vector load.
 
-define <8 x i16> @gep01_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(18) %p) {
+define <8 x i16> @gep01_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(18) %p) nofree nosync {
 ; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[GEP]] to <8 x i16>*
@@ -271,7 +271,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceabl
 
 ; Can't safely load the offset vector, but can load+shuffle if it is profitable.
 
-define <8 x i16> @gep01_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(17) %p) {
+define <8 x i16> @gep01_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(17) %p) nofree nosync {
 ; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref(
 ; SSE2-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
 ; SSE2-NEXT:    [[S:%.*]] = load i16, i16* [[GEP]], align 2
@@ -291,7 +291,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(<8 x i16>* align 16 derefere
 
 ; Verify that alignment of the new load is not over-specified.
 
-define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2 dereferenceable(16) %p) {
+define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2 dereferenceable(16) %p) nofree nosync {
 ; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
 ; SSE2-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
 ; SSE2-NEXT:    [[S:%.*]] = load i16, i16* [[GEP]], align 8
@@ -328,7 +328,7 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 derefer
   ret <4 x i32> %r
 }
 
-define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) {
+define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) nofree nosync {
 ; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
@@ -346,7 +346,7 @@ define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 derefe
 ; must be a multiple of element size and the offset must be low enough to fit in the vector
 ; (bitcasting would not help this case).
 
-define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) {
+define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) nofree nosync {
 ; CHECK-LABEL: @gep013_bitcast_load_i32_insert_v4i32(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 13
 ; CHECK-NEXT:    [[B:%.*]] = bitcast i8* [[GEP]] to i32*
@@ -363,7 +363,7 @@ define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 derefe
 
 ; If there are enough dereferenceable bytes, we can offset the vector load.
 
-define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(32) %p) {
+define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(32) %p) nofree nosync {
 ; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[GEP]] to <8 x i16>*
@@ -380,7 +380,7 @@ define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceabl
 ; Negative test - disable under asan because widened load can cause spurious
 ; use-after-poison issues when __asan_poison_memory_region is used.
 
-define <8 x i16> @gep10_load_i16_insert_v8i16_asan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_address {
+define <8 x i16> @gep10_load_i16_insert_v8i16_asan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_address nofree nosync {
 ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_asan(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
 ; CHECK-NEXT:    [[S:%.*]] = load i16, i16* [[GEP]], align 16
@@ -395,7 +395,7 @@ define <8 x i16> @gep10_load_i16_insert_v8i16_asan(<8 x i16>* align 16 dereferen
 
 ; hwasan and memtag should be similarly suppressed.
 
-define <8 x i16> @gep10_load_i16_insert_v8i16_hwasan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_hwaddress {
+define <8 x i16> @gep10_load_i16_insert_v8i16_hwasan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_hwaddress nofree nosync {
 ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_hwasan(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
 ; CHECK-NEXT:    [[S:%.*]] = load i16, i16* [[GEP]], align 16
@@ -408,7 +408,7 @@ define <8 x i16> @gep10_load_i16_insert_v8i16_hwasan(<8 x i16>* align 16 derefer
   ret <8 x i16> %r
 }
 
-define <8 x i16> @gep10_load_i16_insert_v8i16_memtag(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_memtag {
+define <8 x i16> @gep10_load_i16_insert_v8i16_memtag(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_memtag nofree nosync {
 ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_memtag(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
 ; CHECK-NEXT:    [[S:%.*]] = load i16, i16* [[GEP]], align 16
@@ -424,7 +424,7 @@ define <8 x i16> @gep10_load_i16_insert_v8i16_memtag(<8 x i16>* align 16 derefer
 ; Negative test - disable under tsan because widened load may overlap bytes
 ; being concurrently modified. tsan does not know that some bytes are undef.
 
-define <8 x i16> @gep10_load_i16_insert_v8i16_tsan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_thread {
+define <8 x i16> @gep10_load_i16_insert_v8i16_tsan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_thread nofree nosync {
 ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_tsan(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
 ; CHECK-NEXT:    [[S:%.*]] = load i16, i16* [[GEP]], align 16
@@ -439,7 +439,7 @@ define <8 x i16> @gep10_load_i16_insert_v8i16_tsan(<8 x i16>* align 16 dereferen
 
 ; Negative test - can't safely load the offset vector, but could load+shuffle.
 
-define <8 x i16> @gep10_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(31) %p) {
+define <8 x i16> @gep10_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(31) %p) nofree nosync {
 ; CHECK-LABEL: @gep10_load_i16_insert_v8i16_deref(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
 ; CHECK-NEXT:    [[S:%.*]] = load i16, i16* [[GEP]], align 16
@@ -454,7 +454,7 @@ define <8 x i16> @gep10_load_i16_insert_v8i16_deref(<8 x i16>* align 16 derefere
 
 ; Negative test - do not alter volatile.
 
-define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceable(16) %p) {
+define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v4f32_volatile(
 ; CHECK-NEXT:    [[S:%.*]] = load volatile float, float* [[P:%.*]], align 4
 ; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
@@ -468,7 +468,7 @@ define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceab
 ; Pointer is not as aligned as load, but that's ok.
 ; The new load uses the larger alignment value.
 
-define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(16) %p) {
+define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v4f32_align(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
@@ -482,7 +482,7 @@ define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(1
 
 ; Negative test - not enough bytes.
 
-define <4 x float> @load_f32_insert_v4f32_deref(float* align 4 dereferenceable(15) %p) {
+define <4 x float> @load_f32_insert_v4f32_deref(float* align 4 dereferenceable(15) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v4f32_deref(
 ; CHECK-NEXT:    [[S:%.*]] = load float, float* [[P:%.*]], align 4
 ; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
@@ -493,7 +493,7 @@ define <4 x float> @load_f32_insert_v4f32_deref(float* align 4 dereferenceable(1
   ret <4 x float> %r
 }
 
-define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) {
+define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_i32_insert_v8i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16
@@ -505,7 +505,7 @@ define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) {
   ret <8 x i32> %r
 }
 
-define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceable(16) %p) {
+define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @casted_load_i32_insert_v8i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4
 ; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -517,7 +517,7 @@ define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceabl
   ret <8 x i32> %r
 }
 
-define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) %p) {
+define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v16f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
@@ -529,7 +529,7 @@ define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16)
   ret <16 x float> %r
 }
 
-define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p) {
+define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_f32_insert_v2f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
@@ -584,7 +584,7 @@ define void @PR47558_multiple_use_load(<2 x float>* nocapture nonnull %resultptr
   ret void
 }
 
-define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 dereferenceable(16) %p) {
+define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_v2f32_extract_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x float>* [[P:%.*]] to <4 x float>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
@@ -597,7 +597,7 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 derefe
   ret <4 x float> %r
 }
 
-define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 dereferenceable(16) %p) {
+define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 dereferenceable(16) %p) nofree nosync {
 ; CHECK-LABEL: @load_v8f32_extract_insert_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x float>* [[P:%.*]] to <4 x float>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
@@ -610,7 +610,7 @@ define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 derefe
   ret <4 x float> %r
 }
 
-define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16 dereferenceable(16) %p, <1 x i32>* %store_ptr) {
+define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16 dereferenceable(16) %p, <1 x i32>* %store_ptr) nofree nosync {
 ; CHECK-LABEL: @load_v1i32_extract_insert_v8i32_extra_use(
 ; CHECK-NEXT:    [[L:%.*]] = load <1 x i32>, <1 x i32>* [[P:%.*]], align 4
 ; CHECK-NEXT:    store <1 x i32> [[L]], <1 x i32>* [[STORE_PTR:%.*]], align 4
@@ -627,7 +627,7 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16
 
 ; Can't safely load the offset vector, but can load+shuffle if it is profitable.
 
-define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 1 dereferenceable(16) %p) {
+define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 1 dereferenceable(16) %p) nofree nosync {
 ; SSE2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
 ; SSE2-NEXT:    [[GEP:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* [[P:%.*]], i64 1
 ; SSE2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* [[GEP]], i32 0, i32 0


        


More information about the llvm-commits mailing list