[llvm] 20ef8b0 - [AMDGPU] Add `nocreateundeforpoison` annotations (#166450)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 2 10:13:03 PST 2026
Author: Krzysztof Drewniak
Date: 2026-01-02T10:12:58-08:00
New Revision: 20ef8b02855ab3bf71d4be1b2423834e507dfe3b
URL: https://github.com/llvm/llvm-project/commit/20ef8b02855ab3bf71d4be1b2423834e507dfe3b
DIFF: https://github.com/llvm/llvm-project/commit/20ef8b02855ab3bf71d4be1b2423834e507dfe3b.diff
LOG: [AMDGPU] Add `nocreateundeforpoison` annotations (#166450)
This commit goes through IntrinsicsAMDGPU.td and adds
`nocreateundeforpoison` to intrinsics that (to my knowledge) perform
arithmetic operations that are defined everywhere (so no bitfield
extracts and such since those can have invalid inputs, and similarly for permutations).
Added:
Modified:
clang/test/CodeGenOpenCL/builtins-amdgcn.cl
clang/test/Headers/__clang_hip_math.hip
llvm/include/llvm/IR/Intrinsics.td
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/include/llvm/IR/IntrinsicsNVVM.td
llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll
llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll
llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll
llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll
llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll
llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll
llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll
llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll
llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll
llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll
llvm/test/Instrumentation/AddressSanitizer/asan-pass-second-run.ll
llvm/test/tools/llvm-reduce/remove-attributes-convergent-uncontrolled.ll
Removed:
################################################################################
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index a5132c9114673..80b585513f71a 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -948,7 +948,7 @@ void test_read_exec(global ulong* out) {
*out = __builtin_amdgcn_read_exec();
}
-// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1){{.*}} #[[$NOUNWIND_READONLY:[0-9]+]]
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1){{.*}} #[[$NOUNWIND_READONLY_NOPOISON:[0-9]+]]
// CHECK-LABEL: @test_read_exec_lo(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.ballot.i32(i1 true)
@@ -956,7 +956,7 @@ void test_read_exec_lo(global uint* out) {
*out = __builtin_amdgcn_read_exec_lo();
}
-// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1){{.*}} #[[$NOUNWIND_READONLY:[0-9]+]]
+// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1){{.*}} #[[$NOUNWIND_READONLY_NOPOISON:[0-9]+]]
// CHECK-LABEL: @test_read_exec_hi(
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.ballot.i64(i1 true)
@@ -1299,4 +1299,4 @@ void test_set_fpenv(unsigned long env) {
// CHECK-DAG: [[$GRID_RANGE]] = !{i32 1, i32 0}
// CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025}
-// CHECK-DAG: attributes #[[$NOUNWIND_READONLY]] = { convergent mustprogress nocallback nofree nounwind willreturn memory(none) }
+// CHECK-DAG: attributes #[[$NOUNWIND_READONLY_NOPOISON]] = { convergent mustprogress nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) }
diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip
index 426e5af319cbf..42625ddcf2d31 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -8914,7 +8914,7 @@ extern "C" __device__ float test___sinf(float x) {
return __sinf(x);
}
-// DEFAULT-LABEL: define dso_local float @test___tanf(
+// DEFAULT-LABEL: define dso_local noundef float @test___tanf(
// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR15]]
@@ -8932,7 +8932,7 @@ extern "C" __device__ float test___sinf(float x) {
// FINITEONLY-NEXT: [[MUL_I:%.*]] = fmul nnan ninf contract float [[CALL_I3_I]], [[TMP0]]
// FINITEONLY-NEXT: ret float [[MUL_I]]
//
-// APPROX-LABEL: define dso_local float @test___tanf(
+// APPROX-LABEL: define dso_local noundef float @test___tanf(
// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
// APPROX-NEXT: [[ENTRY:.*:]]
// APPROX-NEXT: [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR15]]
@@ -8941,7 +8941,7 @@ extern "C" __device__ float test___sinf(float x) {
// APPROX-NEXT: [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]]
// APPROX-NEXT: ret float [[MUL_I]]
//
-// NCRDIV-LABEL: define dso_local float @test___tanf(
+// NCRDIV-LABEL: define dso_local noundef float @test___tanf(
// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR6]] {
// NCRDIV-NEXT: [[ENTRY:.*:]]
// NCRDIV-NEXT: [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR15]]
@@ -8950,7 +8950,7 @@ extern "C" __device__ float test___sinf(float x) {
// NCRDIV-NEXT: [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]]
// NCRDIV-NEXT: ret float [[MUL_I]]
//
-// AMDGCNSPIRV-LABEL: define spir_func float @test___tanf(
+// AMDGCNSPIRV-LABEL: define spir_func noundef float @test___tanf(
// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR6]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
// AMDGCNSPIRV-NEXT: [[CALL_I3_I:%.*]] = tail call contract spir_func noundef addrspace(4) float @__ocml_native_sin_f32(float noundef [[X]]) #[[ATTR15]]
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 35a4158a56da9..c6ad37d363d63 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -754,6 +754,19 @@ class MSBuiltin<string name> {
string MSBuiltinName = name;
}
+/// Utility class for intrinsics that
+/// 1. Don't touch memory or any hidden state
+/// 2. Can be freely speculated, and
+/// 3. Will not create undef or poison on defined inputs.
+class PureIntrinsic<list<LLVMType> ret_types,
+ list<LLVMType> param_types = [],
+ list<IntrinsicProperty> intr_properties = [],
+ string name = "",
+ list<SDNodeProperty> sd_properties = []>
+ : DefaultAttrsIntrinsic<ret_types, param_types,
+ intr_properties # [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison],
+ name, sd_properties>;
+
#ifndef TEST_INTRINSICS_SUPPRESS_DEFS
//===--------------- Variable Argument Handling Intrinsics ----------------===//
@@ -1940,8 +1953,7 @@ def int_fake_use : DefaultAttrsIntrinsic<[], [llvm_vararg_ty],
// Intrinsic to mask out bits of a pointer.
// First argument must be pointer or vector of pointer. This is checked by the
// verifier.
-def int_ptrmask: DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_anyint_ty],
- [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison]>;
+def int_ptrmask: PureIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_anyint_ty]>;
// Intrinsic to wrap a thread local variable.
def int_threadlocal_address : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>],
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 2afe89357a991..d951265fcae42 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -397,47 +397,42 @@ def int_amdgcn_s_wait_loadcnt : AMDGPUWaitIntrinsic;
def int_amdgcn_s_wait_samplecnt : AMDGPUWaitIntrinsic;
def int_amdgcn_s_wait_storecnt : AMDGPUWaitIntrinsic;
-def int_amdgcn_div_scale : DefaultAttrsIntrinsic<
+def int_amdgcn_div_scale : PureIntrinsic<
// 1st parameter: Numerator
// 2nd parameter: Denominator
// 3rd parameter: Select quotient. Must equal Numerator or Denominator.
// (0 = Denominator, 1 = Numerator).
[llvm_anyfloat_ty, llvm_i1_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]
+ [ImmArg<ArgIndex<2>>]
>;
-def int_amdgcn_div_fmas : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
- [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
- [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_div_fmas : PureIntrinsic<[llvm_anyfloat_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty]
>;
-def int_amdgcn_div_fixup : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
- [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]
->;
+def int_amdgcn_div_fixup : PureIntrinsic<[llvm_anyfloat_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
// Look Up 2.0 / pi src0 with segment select src1[4:0]
-def int_amdgcn_trig_preop : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_trig_preop : PureIntrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty]
>;
-def int_amdgcn_sin : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_sin : PureIntrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
-def int_amdgcn_cos : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_cos : PureIntrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
// v_log_{f16|f32}, performs log2. f32 version does not handle
// denormals. There is no reason to use this for f16 as it does
// support denormals, and the generic log2 intrinsic should be
// preferred.
-def int_amdgcn_log : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_log : PureIntrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
// v_exp_{f16|f32} (int_amdgcn_exp was taken by export
@@ -445,17 +440,17 @@ def int_amdgcn_log : DefaultAttrsIntrinsic<
// denormals. There is no reason to use this for f16 as it does
// support denormals, and the generic exp2 intrinsic should be
// preferred.
-def int_amdgcn_exp2 : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_exp2 : PureIntrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
-def int_amdgcn_log_clamp : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_log_clamp : PureIntrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
def int_amdgcn_fmul_legacy : ClangBuiltin<"__builtin_amdgcn_fmul_legacy">,
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable, Commutative]
+ PureIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [Commutative]
>;
// Fused single-precision multiply-add with legacy behaviour for the multiply,
@@ -464,129 +459,144 @@ def int_amdgcn_fmul_legacy : ClangBuiltin<"__builtin_amdgcn_fmul_legacy">,
// v_fmac_legacy_f32 instructions. (Note that v_fma_legacy_f16 is unrelated and
// has a completely
diff erent kind of legacy behaviour.)
def int_amdgcn_fma_legacy :
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable, Commutative]
+ PureIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [Commutative]
>;
-def int_amdgcn_rcp : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_rcp : PureIntrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
def int_amdgcn_rcp_legacy : ClangBuiltin<"__builtin_amdgcn_rcp_legacy">,
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+ PureIntrinsic<[llvm_float_ty], [llvm_float_ty]
>;
-def int_amdgcn_sqrt : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_sqrt : PureIntrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
-def int_amdgcn_rsq : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_rsq : PureIntrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
def int_amdgcn_rsq_legacy : ClangBuiltin<"__builtin_amdgcn_rsq_legacy">,
- DefaultAttrsIntrinsic<
- [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+ PureIntrinsic<
+ [llvm_float_ty], [llvm_float_ty]
>;
// out = 1.0 / sqrt(a) result clamped to +/- max_float.
-def int_amdgcn_rsq_clamp : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>;
+def int_amdgcn_rsq_clamp : PureIntrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
+>;
-def int_amdgcn_frexp_mant : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_frexp_mant : PureIntrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
-def int_amdgcn_frexp_exp : DefaultAttrsIntrinsic<
- [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_frexp_exp : PureIntrinsic<
+ [llvm_anyint_ty], [llvm_anyfloat_ty]
>;
// v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0
// and always uses rtz, so is not suitable for implementing the OpenCL
// fract function. It should be ok on VI.
-def int_amdgcn_fract : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_fract : PureIntrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
>;
def int_amdgcn_cvt_pkrtz : ClangBuiltin<"__builtin_amdgcn_cvt_pkrtz">,
- DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+ PureIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty]
>;
def int_amdgcn_cvt_pknorm_i16 :
ClangBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">,
- DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+ PureIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty]
>;
def int_amdgcn_cvt_pknorm_u16 :
ClangBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">,
- DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+ PureIntrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty]
>;
def int_amdgcn_cvt_pk_i16 :
ClangBuiltin<"__builtin_amdgcn_cvt_pk_i16">,
- DefaultAttrsIntrinsic<
- [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
+ PureIntrinsic<
+ [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty]
>;
def int_amdgcn_cvt_pk_u16 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_u16">,
- DefaultAttrsIntrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
+ PureIntrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty]
>;
-def int_amdgcn_class : DefaultAttrsIntrinsic<
- [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_class : PureIntrinsic<
+ [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty]
>;
def int_amdgcn_fmed3 :
- DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
- [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]
+ PureIntrinsic<[llvm_anyfloat_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]
>;
def int_amdgcn_cubeid : ClangBuiltin<"__builtin_amdgcn_cubeid">,
- DefaultAttrsIntrinsic<[llvm_float_ty],
- [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+ PureIntrinsic<[llvm_float_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty]
>;
def int_amdgcn_cubema : ClangBuiltin<"__builtin_amdgcn_cubema">,
- DefaultAttrsIntrinsic<[llvm_float_ty],
- [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+ PureIntrinsic<[llvm_float_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty]
>;
def int_amdgcn_cubesc : ClangBuiltin<"__builtin_amdgcn_cubesc">,
- DefaultAttrsIntrinsic<[llvm_float_ty],
- [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+ PureIntrinsic<[llvm_float_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty]
>;
def int_amdgcn_cubetc : ClangBuiltin<"__builtin_amdgcn_cubetc">,
- DefaultAttrsIntrinsic<[llvm_float_ty],
- [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+ PureIntrinsic<[llvm_float_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty]
>;
// v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz
// should be used.
def int_amdgcn_sffbh :
- DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]
+ PureIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]
>;
// v_mad_f32|f16/v_mac_f32|f16, selected regardless of denorm support.
def int_amdgcn_fmad_ftz :
- DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
- [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]
+ PureIntrinsic<[llvm_anyfloat_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]
>;
+def int_amdgcn_tanh : PureIntrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>]
+>;
+
+def int_amdgcn_cvt_sr_pk_f16_f32 : PureIntrinsic<
+ [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_f16_f32">;
+
+def int_amdgcn_cvt_sr_pk_bf16_f32 : PureIntrinsic<
+ [llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_bf16_f32">;
+
+def int_amdgcn_cvt_pk_f16_fp8 : PureIntrinsic<
+ [llvm_v2f16_ty], [llvm_i16_ty]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">;
+
+def int_amdgcn_cvt_pk_f16_bf8 : PureIntrinsic<
+ [llvm_v2f16_ty], [llvm_i16_ty]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">;
+
+def int_amdgcn_cvt_pk_fp8_f16
+ : PureIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty]>,
+ ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f16">;
+
+def int_amdgcn_cvt_pk_bf8_f16
+ : PureIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty]>,
+ ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f16">;
+
// FIXME: The m0 argument should be moved after the normal arguments
class AMDGPUDSOrderedIntrinsic : Intrinsic<
[llvm_i32_ty],
@@ -625,62 +635,34 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
-def int_amdgcn_tanh : DefaultAttrsIntrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
->;
-
-def int_amdgcn_cvt_sr_pk_f16_f32 : DefaultAttrsIntrinsic<
- [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]
->, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_f16_f32">;
-
-def int_amdgcn_cvt_sr_pk_bf16_f32 : DefaultAttrsIntrinsic<
- [llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]
->, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_bf16_f32">;
-
-def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic<
- [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
->, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">;
-
-def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic<
- [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
->, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">;
-
-def int_amdgcn_cvt_pk_fp8_f16
- : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty],
- [IntrNoMem, IntrSpeculatable]>,
- ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f16">;
-
-def int_amdgcn_cvt_pk_bf8_f16
- : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty],
- [IntrNoMem, IntrSpeculatable]>,
- ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f16">;
-
// llvm.amdgcn.cvt.sr.fp8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3]
// byte_sel selects byte to write in vdst.
-def int_amdgcn_cvt_sr_fp8_f16 : DefaultAttrsIntrinsic<
+def int_amdgcn_cvt_sr_fp8_f16 : PureIntrinsic<
[llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>]
+ [ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f16">;
// llvm.amdgcn.cvt.sr.bf8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3]
// byte_sel selects byte to write in vdst.
-def int_amdgcn_cvt_sr_bf8_f16 : DefaultAttrsIntrinsic<
+def int_amdgcn_cvt_sr_bf8_f16 : PureIntrinsic<
[llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>]
+ [ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f16">;
+// Note: these gfx1250 intrinsics are convergent because they read scales from other lanes.
// llvm.amdgcn.cvt.scale.pk32.f16.bf6 v32f16 vdst, v6i32 src0, i32 scale_sel [0..15]
class AMDGPUCvtScaleIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
[DstTy], [Src0Ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>, Range<ArgIndex<2>, 0, 16>]
+ [IntrNoMem, IntrConvergent, IntrWillReturn,IntrNoCreateUndefOrPoison,
+ ImmArg<ArgIndex<2>>, Range<ArgIndex<2>, 0, 16>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
-class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
- [DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : PureIntrinsic<
+ [DstTy], [Src0Ty, llvm_float_ty]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
-class AMDGPUCvtScaleF32SRIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
- [DstTy], [Src0Ty, llvm_i32_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+class AMDGPUCvtScaleF32SRIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : PureIntrinsic<
+ [DstTy], [Src0Ty, llvm_i32_ty, llvm_float_ty]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
def int_amdgcn_cvt_scale_pk8_f16_fp8 : AMDGPUCvtScaleIntrinsic<llvm_v8f16_ty, llvm_v2i32_ty, "cvt_scale_pk8_f16_fp8">;
@@ -699,8 +681,8 @@ def int_amdgcn_cvt_scale_pk16_bf16_fp6 : AMDGPUCvtScaleIntrinsic<llvm_v16bf16_ty
def int_amdgcn_cvt_scale_pk16_f32_fp6 : AMDGPUCvtScaleIntrinsic<llvm_v16f32_ty, llvm_v3i32_ty, "cvt_scale_pk16_f32_fp6">;
def int_amdgcn_cvt_scale_pk16_f32_bf6 : AMDGPUCvtScaleIntrinsic<llvm_v16f32_ty, llvm_v3i32_ty, "cvt_scale_pk16_f32_bf6">;
-class AMDGPUCvtScaleF32ToFP6BF6Intrinsic<LLVMType DstTy, LLVMType Src0Ty, LLVMType Src1Ty, string name> : DefaultAttrsIntrinsic<
- [DstTy], [Src0Ty, Src1Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
+class AMDGPUCvtScaleF32ToFP6BF6Intrinsic<LLVMType DstTy, LLVMType Src0Ty, LLVMType Src1Ty, string name> : PureIntrinsic<
+ [DstTy], [Src0Ty, Src1Ty, llvm_float_ty]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
def int_amdgcn_cvt_scalef32_pk32_fp6_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_pk32_fp6_f16">;
@@ -748,88 +730,87 @@ def int_amdgcn_cvt_scalef32_sr_pk16_bf6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm
def int_amdgcn_cvt_scalef32_2xpk16_fp6_f32 : AMDGPUCvtScaleF32ToFP6BF6Intrinsic<llvm_v6i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, "cvt_scalef32_2xpk16_fp6_f32">;
def int_amdgcn_cvt_scalef32_2xpk16_bf6_f32 : AMDGPUCvtScaleF32ToFP6BF6Intrinsic<llvm_v6i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, "cvt_scalef32_2xpk16_bf6_f32">;
-class AMDGPUCvtScaleFP4FP8BF8ToF1632Intrinsic<LLVMType DstTy, string name> : DefaultAttrsIntrinsic<
+class AMDGPUCvtScaleFP4FP8BF8ToF1632Intrinsic<LLVMType DstTy, string name> : PureIntrinsic<
[DstTy],
[llvm_i32_ty, // src
llvm_float_ty, // scale
llvm_i32_ty], // src_sel index [0..3]
- [IntrNoMem,
- ImmArg<ArgIndex<2>>, Range<ArgIndex<2>, 0, 4>]
+ [ImmArg<ArgIndex<2>>, Range<ArgIndex<2>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
-class AMDGPUCvtScale_pk_FromFP8BF8Intrinsic<LLVMType DstTy, string name> : DefaultAttrsIntrinsic<
+class AMDGPUCvtScale_pk_FromFP8BF8Intrinsic<LLVMType DstTy, string name> : PureIntrinsic<
[DstTy],
[llvm_i32_ty, // src
llvm_float_ty, // scale
llvm_i1_ty], // src_lo_hi_sel[true false]
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]
+ [ImmArg<ArgIndex<2>>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
-class AMDGPUCvtScaleF16BF16ToFP8BF8TiedInputIntrinsic<LLVMType SrcTy, string name> : DefaultAttrsIntrinsic<
+class AMDGPUCvtScaleF16BF16ToFP8BF8TiedInputIntrinsic<LLVMType SrcTy, string name> : PureIntrinsic<
[llvm_v2i16_ty],
[llvm_v2i16_ty, // old_vdst
SrcTy, // src
llvm_float_ty, // scale
llvm_i1_ty], // dst_lo_hi_sel[true false]
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+ [ImmArg<ArgIndex<3>>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
-class AMDGPUCvtScaleF32ToFP8BF8TiedInputIntrinsic<string name> : DefaultAttrsIntrinsic<
+class AMDGPUCvtScaleF32ToFP8BF8TiedInputIntrinsic<string name> : PureIntrinsic<
[llvm_v2i16_ty],
[llvm_v2i16_ty, // old_vdst
llvm_float_ty, // src0
llvm_float_ty, // src1
llvm_float_ty, // scale
llvm_i1_ty], // dst_lo_hi_sel[true false]
- [IntrNoMem, ImmArg<ArgIndex<4>>]
+ [ImmArg<ArgIndex<4>>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
-class AMDGPUCvtScaleFP8BF8ToF16TiedInputIntrinsic<LLVMType DstTy, string name> : DefaultAttrsIntrinsic<
+class AMDGPUCvtScaleFP8BF8ToF16TiedInputIntrinsic<LLVMType DstTy, string name> :PureIntrinsic<
[DstTy],
[llvm_v2f16_ty, // old_vdst
llvm_i32_ty, // src
llvm_float_ty, // scale
llvm_i32_ty, // src_sel_index[0..3]
llvm_i1_ty], // dst_lo_hi_sel[true false]
- [IntrNoMem, ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>, ImmArg<ArgIndex<4>>]
+ [ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>, ImmArg<ArgIndex<4>>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
-class AMDGPUCvtScaleF32ToFP4Intrinsic<string name> : DefaultAttrsIntrinsic<
+class AMDGPUCvtScaleF32ToFP4Intrinsic<string name> : PureIntrinsic<
[llvm_i32_ty],
[llvm_i32_ty, // old_vdst
llvm_float_ty, // src0
llvm_float_ty, // src1
llvm_float_ty, // scale
llvm_i32_ty], // dst_sel_index[0..3]
- [IntrNoMem, ImmArg<ArgIndex<4>>, Range<ArgIndex<4>, 0, 4>]
+ [ImmArg<ArgIndex<4>>, Range<ArgIndex<4>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
-class AMDGPUCvtScaleF16ToFP4TiedInputIntrinsic<LLVMType SrcTy, string name> : DefaultAttrsIntrinsic<
+class AMDGPUCvtScaleF16ToFP4TiedInputIntrinsic<LLVMType SrcTy, string name> : PureIntrinsic<
[llvm_i32_ty],
[llvm_i32_ty, // old_vdst
SrcTy, // src
llvm_float_ty, // scale
llvm_i32_ty], // dest_sel_index [0..3]
- [IntrNoMem, ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>]
+ [ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
-class AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8TiedInputIntrinsic<LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
+class AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8TiedInputIntrinsic<LLVMType Src0Ty, string name> : PureIntrinsic<
[llvm_i32_ty],
[llvm_i32_ty, // old_vdst
Src0Ty, // src0
llvm_i32_ty, // seed
llvm_float_ty, // scale
llvm_i32_ty], // dst_sel_index[0..3]
- [IntrNoMem, ImmArg<ArgIndex<4>>, Range<ArgIndex<4>, 0, 4>]
+ [ImmArg<ArgIndex<4>>, Range<ArgIndex<4>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
-class AMDGPUCvtScaleSRF32ToBF16F16TiedInputIntrinsic<LLVMType DstTy, string name> : DefaultAttrsIntrinsic<
+class AMDGPUCvtScaleSRF32ToBF16F16TiedInputIntrinsic<LLVMType DstTy, string name> : PureIntrinsic<
[DstTy],
[DstTy, // old_vdst
llvm_float_ty, // src0
llvm_i32_ty, // seed
llvm_i1_ty], // dst_lo_hi_sel[true false]
- [IntrNoMem, ImmArg<ArgIndex<3>>]
+ [ImmArg<ArgIndex<3>>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
def int_amdgcn_cvt_sr_bf16_f32: AMDGPUCvtScaleSRF32ToBF16F16TiedInputIntrinsic<llvm_v2bf16_ty, "cvt_sr_bf16_f32">;
@@ -898,13 +879,13 @@ def int_amdgcn_cvt_scalef32_sr_fp8_f16: AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8Tied
def int_amdgcn_cvt_scalef32_sr_fp8_f32: AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8TiedInputIntrinsic<llvm_float_ty, "cvt_scalef32_sr_fp8_f32">;
def int_amdgcn_prng_b32 : DefaultAttrsIntrinsic<
- [llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]
+ [llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrNoCreateUndefOrPoison]
>, ClangBuiltin<"__builtin_amdgcn_prng_b32">;
def int_amdgcn_bitop3 :
- DefaultAttrsIntrinsic<[llvm_anyint_ty],
+ PureIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]>;
+ [ImmArg<ArgIndex<3>>]>;
} // TargetPrefix = "amdgcn"
@@ -1435,15 +1416,14 @@ let TargetPrefix = "amdgcn" in {
// resources, it can create the fat pointers ptr addrspace(7) and ptr addrspace(9),
// which carry additional offset bits. When this intrinsic is used to create
// these fat pointers, their offset and index fields (if applicable) are zero.
-def int_amdgcn_make_buffer_rsrc : DefaultAttrsIntrinsic <
+def int_amdgcn_make_buffer_rsrc : PureIntrinsic <
[llvm_anyptr_ty],
[llvm_anyptr_ty, // base
llvm_i16_ty, // stride (and swizzle control)
llvm_i64_ty, // NumRecords / extent
llvm_i32_ty], // flags
// Attributes lifted from ptrmask + some extra argument attributes.
- [IntrNoMem, ReadNone<ArgIndex<0>>,
- IntrSpeculatable]>;
+ [ReadNone<ArgIndex<0>>]>;
defset list<AMDGPURsrcIntrinsic> AMDGPUBufferIntrinsics = {
@@ -2364,12 +2344,12 @@ def int_amdgcn_live_mask : DefaultAttrsIntrinsic <[llvm_i1_ty],
def int_amdgcn_mbcnt_lo :
ClangBuiltin<"__builtin_amdgcn_mbcnt_lo">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrNoCreateUndefOrPoison]>;
def int_amdgcn_mbcnt_hi :
ClangBuiltin<"__builtin_amdgcn_mbcnt_hi">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrNoCreateUndefOrPoison]>;
// llvm.amdgcn.ds.swizzle src offset
def int_amdgcn_ds_swizzle :
@@ -2378,69 +2358,47 @@ def int_amdgcn_ds_swizzle :
[IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree,
ImmArg<ArgIndex<1>>]>;
-def int_amdgcn_ubfe : DefaultAttrsIntrinsic<[llvm_anyint_ty],
- [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
->;
+def int_amdgcn_ubfe : PureIntrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty]>;
-def int_amdgcn_sbfe : DefaultAttrsIntrinsic<[llvm_anyint_ty],
- [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
->;
+def int_amdgcn_sbfe : PureIntrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty]>;
def int_amdgcn_lerp :
ClangBuiltin<"__builtin_amdgcn_lerp">,
- DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
->;
+ PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
def int_amdgcn_sad_u8 :
ClangBuiltin<"__builtin_amdgcn_sad_u8">,
- DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
->;
+ PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
def int_amdgcn_msad_u8 :
ClangBuiltin<"__builtin_amdgcn_msad_u8">,
- DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
->;
+ PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
def int_amdgcn_sad_hi_u8 :
ClangBuiltin<"__builtin_amdgcn_sad_hi_u8">,
- DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
->;
+ PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
def int_amdgcn_sad_u16 :
ClangBuiltin<"__builtin_amdgcn_sad_u16">,
- DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
->;
+ PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
def int_amdgcn_qsad_pk_u16_u8 :
ClangBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">,
- DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
- [IntrNoMem, IntrSpeculatable]
->;
+ PureIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty]>;
def int_amdgcn_mqsad_pk_u16_u8 :
ClangBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">,
- DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
- [IntrNoMem, IntrSpeculatable]
->;
+ PureIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty]>;
def int_amdgcn_mqsad_u32_u8 :
ClangBuiltin<"__builtin_amdgcn_mqsad_u32_u8">,
- DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty],
- [IntrNoMem, IntrSpeculatable]
->;
+ PureIntrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty]>;
def int_amdgcn_cvt_pk_u8_f32 :
ClangBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">,
- DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
->;
+ PureIntrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty]>;
def int_amdgcn_icmp :
Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty, LLVMMatchType<1>, llvm_i32_ty],
@@ -2456,7 +2414,7 @@ def int_amdgcn_fcmp :
// in all active lanes, and zero in all inactive lanes.
def int_amdgcn_ballot :
Intrinsic<[llvm_anyint_ty], [llvm_i1_ty],
- [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+ [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison]>;
// Inverse of ballot: return the bit corresponding to the current lane from the
// given mask.
@@ -2464,7 +2422,7 @@ def int_amdgcn_ballot :
// This is only defined for dynamically uniform masks and therefore convergent.
def int_amdgcn_inverse_ballot :
Intrinsic<[llvm_i1_ty], [llvm_anyint_ty],
- [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+ [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison]>;
// Lowers to S_BITREPLICATE_B64_B32.
// The argument must be uniform; otherwise, the result is undefined.
@@ -2503,7 +2461,7 @@ defm int_amdgcn_wave_reduce_ : AMDGPUWaveReduceOps;
def int_amdgcn_readfirstlane :
Intrinsic<[llvm_any_ty], [LLVMMatchType<0>],
- [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+ [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison]>;
// The lane argument must be uniform across the currently active threads of the
// current wave. Otherwise, the result is undefined.
@@ -2530,24 +2488,20 @@ def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">,
// mul24 intrinsics can return i32 or i64.
// When returning i64, they're lowered to a mul24/mulhi24 pair.
-def int_amdgcn_mul_i24 : DefaultAttrsIntrinsic<[llvm_anyint_ty],
- [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_mul_i24 : PureIntrinsic<[llvm_anyint_ty],
+ [llvm_i32_ty, llvm_i32_ty]
>;
-def int_amdgcn_mul_u24 : DefaultAttrsIntrinsic<[llvm_anyint_ty],
- [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_mul_u24 : PureIntrinsic<[llvm_anyint_ty],
+ [llvm_i32_ty, llvm_i32_ty]
>;
-def int_amdgcn_mulhi_i24 : DefaultAttrsIntrinsic<[llvm_i32_ty],
- [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_mulhi_i24 : PureIntrinsic<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty]
>;
-def int_amdgcn_mulhi_u24 : DefaultAttrsIntrinsic<[llvm_i32_ty],
- [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_mulhi_u24 : PureIntrinsic<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty]
>;
// llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id)
@@ -2686,14 +2640,14 @@ def int_amdgcn_set_inactive_chain_arg :
// Return if the given flat pointer points to a local memory address.
def int_amdgcn_is_shared : ClangBuiltin<"__builtin_amdgcn_is_shared">,
- DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
- [IntrNoMem, IntrSpeculatable] // FIXME: This should be captures(ret: address)
+ PureIntrinsic<[llvm_i1_ty], [llvm_ptr_ty]
+ // FIXME: This should be captures(ret: address)
>;
// Return if the given flat pointer points to a prvate memory address.
def int_amdgcn_is_private : ClangBuiltin<"__builtin_amdgcn_is_private">,
- DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
- [IntrNoMem, IntrSpeculatable] // FIXME: This should be captures(ret: address)
+ PureIntrinsic<[llvm_i1_ty], [llvm_ptr_ty]
+ // FIXME: This should be captures(ret: address)
>;
// A uniform tail call to a function with the `amdgpu_cs_chain` or
@@ -2958,7 +2912,8 @@ class AMDGPUWmmaIntrinsic<LLVMType AB, LLVMType CD> :
LLVMMatchType<1>, // %B
LLVMMatchType<0>, // %C
],
- [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]
+ [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree,
+ IntrNoCreateUndefOrPoison]
>;
class AMDGPUWmmaIntrinsicOPSEL<LLVMType AB, LLVMType CD> :
@@ -2970,7 +2925,8 @@ class AMDGPUWmmaIntrinsicOPSEL<LLVMType AB, LLVMType CD> :
LLVMMatchType<0>, // %C
llvm_i1_ty, // %high (op_sel) for GFX11, 0 for GFX12
],
- [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<3>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
+ [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<3>>, IntrWillReturn, IntrNoCallback, IntrNoFree,
+ IntrNoCreateUndefOrPoison]
>;
class AMDGPUWmmaIntrinsicIU<LLVMType AB, LLVMType CD> :
@@ -2984,7 +2940,8 @@ class AMDGPUWmmaIntrinsicIU<LLVMType AB, LLVMType CD> :
LLVMMatchType<0>, // %C
llvm_i1_ty, // %clamp
],
- [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
+ [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree,
+ IntrNoCreateUndefOrPoison]
>;
// WMMA GFX11Only
@@ -3051,7 +3008,7 @@ def int_amdgcn_permlane16_var : ClangBuiltin<"__builtin_amdgcn_permlane16_var">,
Intrinsic<[llvm_i32_ty],
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
[IntrNoMem, IntrConvergent, IntrWillReturn,
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]>;
+ ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree,]>;
// llvm.amdgcn.permlanex16.var <old> <src0> <src1> <fi> <bound_control>
def int_amdgcn_permlanex16_var : ClangBuiltin<"__builtin_amdgcn_permlanex16_var">,
@@ -3077,7 +3034,8 @@ class AMDGPUSWmmacIntrinsicIdxReuse<LLVMType A, LLVMType B, LLVMType CD, LLVMTyp
llvm_i1_ty, // matrix_a_reuse
llvm_i1_ty, // matrix_b_reuse
],
- [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]
+ [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCreateUndefOrPoison,
+ ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]
>;
class AMDGPUSWmmacIntrinsicIdx<LLVMType A, LLVMType B, LLVMType CD, LLVMType Index> :
@@ -3089,7 +3047,7 @@ class AMDGPUSWmmacIntrinsicIdx<LLVMType A, LLVMType B, LLVMType CD, LLVMType Ind
LLVMMatchType<0>, // %C
Index // %Sparsity index for A
],
- [IntrNoMem, IntrConvergent, IntrWillReturn]
+ [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCreateUndefOrPoison]
>;
class AMDGPUSWmmacIntrinsicIUIdx<LLVMType A, LLVMType B, LLVMType CD, LLVMType Index> :
@@ -3104,7 +3062,7 @@ class AMDGPUSWmmacIntrinsicIUIdx<LLVMType A, LLVMType B, LLVMType CD, LLVMType I
Index, // %Sparsity index for A
llvm_i1_ty, // %clamp
],
- [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>]
+ [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCreateUndefOrPoison, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>]
>;
defset list<Intrinsic> AMDGPUWMMAIntrinsicsGFX12 = {
@@ -3214,7 +3172,7 @@ def int_amdgcn_global_prefetch : ClangBuiltin<"__builtin_amdgcn_global_prefetch"
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
def int_amdgcn_fdot2 :
ClangBuiltin<"__builtin_amdgcn_fdot2">,
- DefaultAttrsIntrinsic<
+ PureIntrinsic<
[llvm_float_ty], // %r
[
llvm_v2f16_ty, // %a
@@ -3222,42 +3180,40 @@ def int_amdgcn_fdot2 :
llvm_float_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+ [ImmArg<ArgIndex<3>>]
>;
// f16 %r = llvm.amdgcn.fdot2.f16.f16(v2f16 %a, v2f16 %b, f16 %c)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
def int_amdgcn_fdot2_f16_f16 :
ClangBuiltin<"__builtin_amdgcn_fdot2_f16_f16">,
- DefaultAttrsIntrinsic<
+ PureIntrinsic<
[llvm_half_ty], // %r
[
llvm_v2f16_ty, // %a
llvm_v2f16_ty, // %b
llvm_half_ty // %c
- ],
- [IntrNoMem, IntrSpeculatable]
+ ]
>;
// bf16 %r = llvm.amdgcn.fdot2.bf16.bf16(v2bf16 %a, v2bf16 %b, bf16 %c)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
def int_amdgcn_fdot2_bf16_bf16 :
ClangBuiltin<"__builtin_amdgcn_fdot2_bf16_bf16">,
- DefaultAttrsIntrinsic<
+ PureIntrinsic<
[llvm_bfloat_ty], // %r
[
llvm_v2bf16_ty, // %a
llvm_v2bf16_ty, // %b
llvm_bfloat_ty // %c
- ],
- [IntrNoMem, IntrSpeculatable]
+ ]
>;
// f32 %r = llvm.amdgcn.fdot2.f32.bf16(v2bf16 %a, v2bf16 %b, f32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
def int_amdgcn_fdot2_f32_bf16 :
ClangBuiltin<"__builtin_amdgcn_fdot2_f32_bf16">,
- DefaultAttrsIntrinsic<
+ PureIntrinsic<
[llvm_float_ty], // %r
[
llvm_v2bf16_ty, // %a
@@ -3265,7 +3221,7 @@ def int_amdgcn_fdot2_f32_bf16 :
llvm_float_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+ [ImmArg<ArgIndex<3>>]
>;
// f32 %r = llvm.amdgcn.fdot2c.f32.bf16(v2bf16 %a, v2bf16 %b, f32 %c, i1 %clamp)
@@ -3275,7 +3231,7 @@ def int_amdgcn_fdot2_f32_bf16 :
def int_amdgcn_fdot2c_f32_bf16 :
ClangBuiltin<"__builtin_amdgcn_fdot2c_f32_bf16">,
- DefaultAttrsIntrinsic<
+ PureIntrinsic<
[llvm_float_ty], // %r
[
llvm_v2bf16_ty, // %a
@@ -3283,14 +3239,14 @@ def int_amdgcn_fdot2c_f32_bf16 :
llvm_float_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+ [ImmArg<ArgIndex<3>>]
>;
// i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
def int_amdgcn_sdot2 :
ClangBuiltin<"__builtin_amdgcn_sdot2">,
- DefaultAttrsIntrinsic<
+ PureIntrinsic<
[llvm_i32_ty], // %r
[
llvm_v2i16_ty, // %a
@@ -3298,14 +3254,14 @@ def int_amdgcn_sdot2 :
llvm_i32_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+ [ImmArg<ArgIndex<3>>]
>;
// u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
def int_amdgcn_udot2 :
ClangBuiltin<"__builtin_amdgcn_udot2">,
- DefaultAttrsIntrinsic<
+ PureIntrinsic<
[llvm_i32_ty], // %r
[
llvm_v2i16_ty, // %a
@@ -3313,14 +3269,14 @@ def int_amdgcn_udot2 :
llvm_i32_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+ [ImmArg<ArgIndex<3>>]
>;
// i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
def int_amdgcn_sdot4 :
ClangBuiltin<"__builtin_amdgcn_sdot4">,
- DefaultAttrsIntrinsic<
+ PureIntrinsic<
[llvm_i32_ty], // %r
[
llvm_i32_ty, // %a
@@ -3328,14 +3284,14 @@ def int_amdgcn_sdot4 :
llvm_i32_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+ [ImmArg<ArgIndex<3>>]
>;
// u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
def int_amdgcn_udot4 :
ClangBuiltin<"__builtin_amdgcn_udot4">,
- DefaultAttrsIntrinsic<
+ PureIntrinsic<
[llvm_i32_ty], // %r
[
llvm_i32_ty, // %a
@@ -3343,7 +3299,7 @@ def int_amdgcn_udot4 :
llvm_i32_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+ [ImmArg<ArgIndex<3>>]
>;
// i32 %r = llvm.amdgcn.sudot4(i1 %a_sign, v4i8 (as i32) %a, i1 %b_sign, v4i8 (as i32) %b, i32 %c, i1 %clamp)
@@ -3353,7 +3309,7 @@ def int_amdgcn_udot4 :
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
def int_amdgcn_sudot4 :
ClangBuiltin<"__builtin_amdgcn_sudot4">,
- DefaultAttrsIntrinsic<
+ PureIntrinsic<
[llvm_i32_ty], // %r
[
llvm_i1_ty, // %a_sign
@@ -3363,8 +3319,7 @@ def int_amdgcn_sudot4 :
llvm_i32_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable,
- ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>]
+ [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>]
>;
// i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c, i1 %clamp)
@@ -3372,7 +3327,7 @@ def int_amdgcn_sudot4 :
// %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
def int_amdgcn_sdot8 :
ClangBuiltin<"__builtin_amdgcn_sdot8">,
- DefaultAttrsIntrinsic<
+ PureIntrinsic<
[llvm_i32_ty], // %r
[
llvm_i32_ty, // %a
@@ -3380,7 +3335,7 @@ def int_amdgcn_sdot8 :
llvm_i32_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+ [ImmArg<ArgIndex<3>>]
>;
// u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp)
@@ -3388,7 +3343,7 @@ def int_amdgcn_sdot8 :
// %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
def int_amdgcn_udot8 :
ClangBuiltin<"__builtin_amdgcn_udot8">,
- DefaultAttrsIntrinsic<
+ PureIntrinsic<
[llvm_i32_ty], // %r
[
llvm_i32_ty, // %a
@@ -3396,7 +3351,7 @@ def int_amdgcn_udot8 :
llvm_i32_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
+ [ImmArg<ArgIndex<3>>]
>;
// i32 %r = llvm.amdgcn.sudot8(i1 %a_sign, v8i4 (as i32) %a, i1 %b_sign, v8i4 (as i32) %b, i32 %c, i1 %clamp)
@@ -3407,7 +3362,7 @@ def int_amdgcn_udot8 :
// %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
def int_amdgcn_sudot8 :
ClangBuiltin<"__builtin_amdgcn_sudot8">,
- DefaultAttrsIntrinsic<
+ PureIntrinsic<
[llvm_i32_ty], // %r
[
llvm_i1_ty, // %a_sign
@@ -3417,22 +3372,20 @@ def int_amdgcn_udot8 :
llvm_i32_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable,
- ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>]
+ [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>]
>;
// f32 %r = llvm.amdgcn.dot4.f32.type_a.type_b (v4type_a (as i32) %a, v4type_b (as i32) %b, f32 %c)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
class AMDGPU8bitFloatDot4Intrinsic :
ClangBuiltin<!subst("int", "__builtin", NAME)>,
- DefaultAttrsIntrinsic<
+ PureIntrinsic<
[llvm_float_ty], // %r
[
llvm_i32_ty, // %a
llvm_i32_ty, // %b
llvm_float_ty, // %c
- ],
- [IntrNoMem, IntrSpeculatable]
+ ]
>;
def int_amdgcn_dot4_f32_fp8_bf8 : AMDGPU8bitFloatDot4Intrinsic;
@@ -3450,7 +3403,7 @@ class AMDGPUMfmaIntrinsic<LLVMType DestTy, LLVMType SrcABTy> :
DefaultAttrsIntrinsic<[DestTy],
[SrcABTy, SrcABTy, DestTy,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem,
+ [IntrConvergent, IntrNoMem, IntrNoCreateUndefOrPoison,
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
@@ -3475,7 +3428,7 @@ class AMDGPUMfmaScaleIntrinsic<LLVMType DestTy> :
llvm_i32_ty, // op_sel (B matrix scale, 2-bits) // TODO: Make i2?
llvm_i32_ty // v_mfma_ld_scale_b32 src1 (B matrix scale)
],
- [IntrConvergent, IntrNoMem,
+ [IntrConvergent, IntrNoMem, IntrNoCreateUndefOrPoison,
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>,
ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<7>>
]>;
@@ -3539,7 +3492,7 @@ class AMDGPUMSmfmacIntrinsic<LLVMType DestTy, LLVMType SrcA, LLVMType SrcB> :
DefaultAttrsIntrinsic<[DestTy],
[SrcA, SrcB, DestTy, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrNoMem,
+ [IntrConvergent, IntrNoMem, IntrNoCreateUndefOrPoison,
ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
class AMDGPUMFp8SmfmacIntrinsic<LLVMType DestTy> :
@@ -3592,59 +3545,58 @@ def int_amdgcn_cvt_f32_fp8_e5m3 : ClangBuiltin<"__builtin_amdgcn_cvt_f32_fp8_e5m
// llvm.amdgcn.cvt.pk.f32.bf8 float2 vdst, int srcA, imm word_sel
// word_sel = 1 selects 2 high bytes, 0 selects 2 low bytes.
def int_amdgcn_cvt_pk_f32_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_bf8">,
- DefaultAttrsIntrinsic<[llvm_v2f32_ty],
+ PureIntrinsic<[llvm_v2f32_ty],
[llvm_i32_ty, llvm_i1_ty],
- [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ [ImmArg<ArgIndex<1>>]>;
// llvm.amdgcn.cvt.pk.f32.fp8 float2 vdst, int srcA, imm word_sel.
def int_amdgcn_cvt_pk_f32_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_fp8">,
- DefaultAttrsIntrinsic<[llvm_v2f32_ty],
+ PureIntrinsic<[llvm_v2f32_ty],
[llvm_i32_ty, llvm_i1_ty],
- [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ [ImmArg<ArgIndex<1>>]>;
// llvm.amdgcn.cvt.pk.bf8.f32 int vdst, float srcA, float srcB, int old, imm word_sel
// word_sel = 1 selects 2 high bytes in the vdst, 0 selects 2 low bytes.
def int_amdgcn_cvt_pk_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f32">,
- DefaultAttrsIntrinsic<[llvm_i32_ty],
+ PureIntrinsic<[llvm_i32_ty],
[llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty],
- [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ [ImmArg<ArgIndex<3>>]>;
// llvm.amdgcn.cvt.pk.fp8.f32 int vdst, float srcA, float srcB, int old, imm word_sel
def int_amdgcn_cvt_pk_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32">,
- DefaultAttrsIntrinsic<[llvm_i32_ty],
+ PureIntrinsic<[llvm_i32_ty],
[llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty],
- [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ [ImmArg<ArgIndex<3>>]>;
// llvm.amdgcn.cvt.pk.fp8.f32.e5m3 int vdst, float srcA, float srcB, int old, imm word_sel
def int_amdgcn_cvt_pk_fp8_f32_e5m3 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32_e5m3">,
- DefaultAttrsIntrinsic<[llvm_i32_ty],
+ PureIntrinsic<[llvm_i32_ty],
[llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty],
- [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ [ImmArg<ArgIndex<3>>]>;
// llvm.amdgcn.cvt.sr.bf8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3]
// byte_sel selects byte to write into vdst.
def int_amdgcn_cvt_sr_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f32">,
- DefaultAttrsIntrinsic<[llvm_i32_ty],
+ PureIntrinsic<[llvm_i32_ty],
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ [ImmArg<ArgIndex<3>>]>;
// llvm.amdgcn.cvt.sr.fp8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3]
def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">,
- DefaultAttrsIntrinsic<[llvm_i32_ty],
+ PureIntrinsic<[llvm_i32_ty],
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ [ImmArg<ArgIndex<3>>]>;
// llvm.amdgcn.cvt.sr.fp8.f32.e5m3 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3]
def int_amdgcn_cvt_sr_fp8_f32_e5m3 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32_e5m3">,
- DefaultAttrsIntrinsic<[llvm_i32_ty],
+ PureIntrinsic<[llvm_i32_ty],
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ [ImmArg<ArgIndex<3>>]>;
// llvm.amdgcn.cvt.off.fp32.i4 int srcA
def int_amdgcn_cvt_off_f32_i4: ClangBuiltin<"__builtin_amdgcn_cvt_off_f32_i4">,
- DefaultAttrsIntrinsic<[llvm_float_ty],
- [llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]>;
+ PureIntrinsic<[llvm_float_ty],
+ [llvm_i32_ty]>;
//===----------------------------------------------------------------------===//
// gfx950 intrinsics
@@ -3691,13 +3643,11 @@ def int_amdgcn_permlane32_swap :
// llvm.amdgcn.ashr_pk_i8_i32 int vdst, int src0, int src1 int src2
def int_amdgcn_ashr_pk_i8_i32 : ClangBuiltin<"__builtin_amdgcn_ashr_pk_i8_i32">,
- DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]>;
+ PureIntrinsic<[llvm_i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
// llvm.amdgcn.ashr_pk_u8_i32 int vdst, int src0, int src1 int src2
def int_amdgcn_ashr_pk_u8_i32 : ClangBuiltin<"__builtin_amdgcn_ashr_pk_u8_i32">,
- DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]>;
+ PureIntrinsic<[llvm_i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
//===----------------------------------------------------------------------===//
// gfx1250 intrinsics
@@ -3745,15 +3695,15 @@ def int_amdgcn_cvt_f16_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_f16_fp8">,
// llvm.amdgcn.cvt.f16.bf8 half vdst, int srcA, imm byte_sel [0..3]
def int_amdgcn_cvt_f16_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_f16_bf8">,
- DefaultAttrsIntrinsic<[llvm_half_ty],
+ PureIntrinsic<[llvm_half_ty],
[llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ [ImmArg<ArgIndex<1>>]>;
def int_amdgcn_sat_pk4_i4_i8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_i4_i8">,
- DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
+ PureIntrinsic<[llvm_i16_ty], [llvm_i32_ty]>;
def int_amdgcn_sat_pk4_u4_u8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_u4_u8">,
- DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
+ PureIntrinsic<[llvm_i16_ty], [llvm_i32_ty]>;
// llvm.amdgcn.permlane.bcast <src0> <src1> <src2>
def int_amdgcn_permlane_bcast : ClangBuiltin<"__builtin_amdgcn_permlane_bcast">,
@@ -3786,20 +3736,17 @@ def int_amdgcn_permlane_idx_gen : ClangBuiltin<"__builtin_amdgcn_permlane_idx_ge
[IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
def int_amdgcn_perm_pk16_b4_u4 : ClangBuiltin<"__builtin_amdgcn_perm_pk16_b4_u4">,
- DefaultAttrsIntrinsic<[llvm_v2i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty],
- [IntrNoMem, IntrSpeculatable]>;
+ PureIntrinsic<[llvm_v2i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty]>;
def int_amdgcn_perm_pk16_b6_u4 : ClangBuiltin<"__builtin_amdgcn_perm_pk16_b6_u4">,
- DefaultAttrsIntrinsic<[llvm_v3i32_ty], [llvm_i32_ty, llvm_i64_ty, llvm_v2i32_ty],
- [IntrNoMem, IntrSpeculatable]>;
+ PureIntrinsic<[llvm_v3i32_ty], [llvm_i32_ty, llvm_i64_ty, llvm_v2i32_ty]>;
def int_amdgcn_perm_pk16_b8_u4 : ClangBuiltin<"__builtin_amdgcn_perm_pk16_b8_u4">,
- DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_v2i32_ty],
- [IntrNoMem, IntrSpeculatable]>;
+ PureIntrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_v2i32_ty]>;
class AMDGPUAddMinMax<LLVMType Ty, string Name> : ClangBuiltin<"__builtin_amdgcn_"#Name>,
- DefaultAttrsIntrinsic<[Ty], [Ty, Ty, Ty, llvm_i1_ty /* clamp */],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]>;
+ PureIntrinsic<[Ty], [Ty, Ty, Ty, llvm_i1_ty /* clamp */],
+ [ImmArg<ArgIndex<3>>]>;
def int_amdgcn_add_max_i32 : AMDGPUAddMinMax<llvm_i32_ty, "add_max_i32">;
def int_amdgcn_add_max_u32 : AMDGPUAddMinMax<llvm_i32_ty, "add_max_u32">;
@@ -3883,10 +3830,8 @@ def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent, IntrNoCallback,
// Emit 2.5 ulp, no denormal division. Should only be inserted by
// pass based on !fpmath metadata.
-def int_amdgcn_fdiv_fast : DefaultAttrsIntrinsic<
- [llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
->;
+def int_amdgcn_fdiv_fast : PureIntrinsic<
+ [llvm_float_ty], [llvm_float_ty, llvm_float_ty]>;
// Async instructions increment ASYNCcnt which is modeled as InaccessibleMem.
class AMDGPUAsyncClusterLoadLDS : Intrinsic <
@@ -3971,7 +3916,7 @@ class AMDGPUWmmaIntrinsicModsAB<LLVMType AB, LLVMType CD> :
llvm_i1_ty, // %clamp
],
[IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>,
- IntrWillReturn, IntrNoCallback, IntrNoFree]
+ IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison]
>;
class AMDGPUWmmaIntrinsicModsC<LLVMType AB, LLVMType CD> :
@@ -3986,7 +3931,7 @@ class AMDGPUWmmaIntrinsicModsC<LLVMType AB, LLVMType CD> :
llvm_i1_ty, // matrix_b_reuse
],
[IntrNoMem, IntrConvergent, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>,
- IntrWillReturn, IntrNoCallback, IntrNoFree]
+ IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison]
>;
class AMDGPUWmmaIntrinsicF4ModsC<LLVMType A, LLVMType B, LLVMType CD> :
@@ -3998,7 +3943,9 @@ class AMDGPUWmmaIntrinsicF4ModsC<LLVMType A, LLVMType B, LLVMType CD> :
llvm_i16_ty, // %C_mod: 0 - none, 1 - neg, 2 - abs, 3 - neg(abs)
LLVMMatchType<0>, // %C
],
- [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<2>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
+ [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<2>>,
+ IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison
+ ]
>;
class AMDGPUWmmaIntrinsicModsAll<LLVMType AB, LLVMType CD> :
@@ -4085,7 +4032,7 @@ class AMDGPUWmmaScaleIntrinsicModsC<LLVMType scale_ty> :
],
[IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<6>>,
ImmArg<ArgIndex<7>>, ImmArg<ArgIndex<9>>, ImmArg<ArgIndex<10>>, ImmArg<ArgIndex<12>>, ImmArg<ArgIndex<13>>,
- IntrWillReturn, IntrNoCallback, IntrNoFree]
+ IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison]
>;
class AMDGPUWmmaScaleF4IntrinsicModsC<LLVMType scale_ty> :
@@ -4107,7 +4054,7 @@ class AMDGPUWmmaScaleF4IntrinsicModsC<LLVMType scale_ty> :
],
[IntrNoMem, IntrConvergent, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<7>>,
ImmArg<ArgIndex<8>>, ImmArg<ArgIndex<10>>, ImmArg<ArgIndex<11>>,
- IntrWillReturn, IntrNoCallback, IntrNoFree]
+ IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison]
>;
defset list<Intrinsic> AMDGPUWMMAIntrinsicsGFX1250 = {
@@ -4155,7 +4102,8 @@ class AMDGPUSWmmacIntrinsicABIdx<LLVMType A, LLVMType B, LLVMType CD, LLVMType I
llvm_i1_ty, // matrix_a_reuse
llvm_i1_ty, // matrix_b_reuse
],
- [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>]
+ [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCreateUndefOrPoison,
+ ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>]
>;
defset list<Intrinsic> AMDGPUSWMMACIntrinsicsGFX1250 = {
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 6daad1e7b05ef..bad655c7c4d1c 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -13,7 +13,7 @@
//===----------------------------------------------------------------------===//
// Guidelines on NVPTX Intrinsic design
//===----------------------------------------------------------------------===//
-//
+//
// The NVPTX intrinsics are used to model instructions in the PTX ISA.
// While simpler intrinsics can represent certain features effectively,
// more complex instructions like TMA and MMA are not as straightforward
@@ -36,15 +36,15 @@
//
// Below are a set of guidelines that may help in choosing
// an appropriate design for the complex intrinsics:
-//
+//
// 1. Each flag argument represents one set of instruction modifiers.
// These flags are compile-time integer constants.
-//
+//
// 2. When an intrinsic uses flags, document it with details of the
// flag usage in the ``NVPTXUsage.rst`` file.
// 3. Annotate all flag arguments with ImmArg<ArgIdx<>>.
// 4. Place the flag arguments at the end of the (actual)argument list.
-//
+//
// 5. Use `i1` for boolean flags and `i8` for others. Usually,
// the `i8` types represent an `enum` encoding the family of
// modifiers.
@@ -60,14 +60,14 @@
// first-order information in the intrinsic name while using flags
// for supplementary details improves readability.
// For example:
-//
+//
// i. For MMA intrinsics, 'dense' vs. 'sparse' is a fundamental feature,
// whereas an optional scaling applied to matrices is relatively secondary.
-//
+//
// ii. For TMAs, the mode of copy (e.g., 'Tile' or 'Im2col') is a first-order
// information, while features like an optional cache hint tend to be
// secondary.
-//
+//
// 8. If there are invalid combinations within a set of modifiers, avoid
// encoding them as flags, as much as possible. This helps reduce the
// need for error handling of unsupported cases in the backend.
@@ -77,13 +77,13 @@
// 9. Similarly, when there are invalid combinations across a set of
// modifiers, avoid encoding them as flags to prevent additional
// complexity in error handling.
-//
+//
// 10. Maintain a consistent design within an intrinsic family, including
// argument ordering as well as the usage and ordering of flags.
// 11. When designing an intrinsic corresponding to an instruction or its variant,
// consider the entire instruction family. This may reveal common features
// that can be modelled consistently across the family.
-//
+//
// In summary, strive to balance the aspects mentioned above, to achieve
// a scalable design with maximum readability.
//===----------------------------------------------------------------------===//
@@ -1266,7 +1266,13 @@ class NVVMBuiltin :
"NVVMBuiltin must be a NVVM intrinsic starting with 'int_nvvm_'";
}
-class PureIntrinsic<list<LLVMType> ret_types,
+// Note(krzysz00): This class is named `NVVMPureIntrinsic` because the
+// `PureIntrinsic` class I added to `Intrinsics.td` also adds the
+// new `nocreateundeforpoison` property (which means that if the operanands
+// to the intrinsic aren't undef/poison, the result won't be either). I don't know
+// the NVVM intrinsics and so can't update the annotations. Someone from Nvidia
+// should go through an update these (or swap back to `PureIntrinsic` wholesale).
+class NVVMPureIntrinsic<list<LLVMType> ret_types,
list<LLVMType> param_types = [],
list<IntrinsicProperty> intr_properties = [],
string name = ""> :
@@ -1279,18 +1285,18 @@ let TargetPrefix = "nvvm" in {
// PRMT - permute
//
def int_nvvm_prmt : NVVMBuiltin,
- PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
+ NVVMPureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
foreach mode = ["f4e", "b4e"] in
def int_nvvm_prmt_ # mode :
- PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
+ NVVMPureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
// Note: these variants also have 2 source operands but only one will ever
// be used so we eliminate the other operand in the IR (0 is used as the
// placeholder in the backend).
foreach mode = ["rc8", "ecl", "ecr", "rc16"] in
def int_nvvm_prmt_ # mode :
- PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>;
+ NVVMPureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>;
//
// Nanosleep
@@ -1392,13 +1398,13 @@ let TargetPrefix = "nvvm" in {
//
foreach sign = ["", "u"] in {
def int_nvvm_sad_ # sign # s : NVVMBuiltin,
- PureIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty]>;
+ NVVMPureIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty]>;
def int_nvvm_sad_ # sign # i : NVVMBuiltin,
- PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
+ NVVMPureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
def int_nvvm_sad_ # sign # ll : NVVMBuiltin,
- PureIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty]>;
+ NVVMPureIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty]>;
}
//
@@ -1407,9 +1413,9 @@ let TargetPrefix = "nvvm" in {
foreach op = ["floor", "ceil"] in {
foreach ftz = ["", "_ftz"] in
def int_nvvm_ # op # ftz # _f : NVVMBuiltin,
- PureIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
+ NVVMPureIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
def int_nvvm_ # op # _d : NVVMBuiltin,
- PureIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
+ NVVMPureIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
}
//
@@ -1417,45 +1423,45 @@ let TargetPrefix = "nvvm" in {
//
foreach ftz = ["", "_ftz"] in
def int_nvvm_fabs # ftz :
- PureIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ NVVMPureIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
//
// Neg bf16, bf16x2
//
def int_nvvm_neg_bf16 : NVVMBuiltin,
- PureIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty]>;
+ NVVMPureIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty]>;
def int_nvvm_neg_bf16x2 : NVVMBuiltin,
- PureIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty]>;
+ NVVMPureIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty]>;
//
// Round
//
foreach ftz = ["", "_ftz"] in
def int_nvvm_round # ftz # _f : NVVMBuiltin,
- PureIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
+ NVVMPureIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
def int_nvvm_round_d : NVVMBuiltin,
- PureIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
+ NVVMPureIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
//
// Trunc
//
foreach ftz = ["", "_ftz"] in
def int_nvvm_trunc # ftz # _f : NVVMBuiltin,
- PureIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
+ NVVMPureIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
def int_nvvm_trunc_d : NVVMBuiltin,
- PureIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
+ NVVMPureIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
//
// Saturate
//
foreach ftz = ["", "_ftz"] in
def int_nvvm_saturate # ftz # _f : NVVMBuiltin,
- PureIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
+ NVVMPureIntrinsic<[llvm_float_ty], [llvm_float_ty]>;
def int_nvvm_saturate_d : NVVMBuiltin,
- PureIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
+ NVVMPureIntrinsic<[llvm_double_ty], [llvm_double_ty]>;
//
// Exp2 Log2
@@ -1487,19 +1493,19 @@ let TargetPrefix = "nvvm" in {
foreach variant = ["", "_sat", "_relu"] in {
foreach ftz = ["", "_ftz"] in {
def int_nvvm_fma_rn # ftz # variant # _f16 :
- PureIntrinsic<[llvm_half_ty],
+ NVVMPureIntrinsic<[llvm_half_ty],
[llvm_half_ty, llvm_half_ty, llvm_half_ty]>;
def int_nvvm_fma_rn # ftz # variant # _f16x2 :
- PureIntrinsic<[llvm_v2f16_ty],
+ NVVMPureIntrinsic<[llvm_v2f16_ty],
[llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty]>;
def int_nvvm_fma_rn # ftz # variant # _bf16 : NVVMBuiltin,
- PureIntrinsic<[llvm_bfloat_ty],
+ NVVMPureIntrinsic<[llvm_bfloat_ty],
[llvm_bfloat_ty, llvm_bfloat_ty, llvm_bfloat_ty]>;
def int_nvvm_fma_rn # ftz # variant # _bf16x2 : NVVMBuiltin,
- PureIntrinsic<[llvm_v2bf16_ty],
+ NVVMPureIntrinsic<[llvm_v2bf16_ty],
[llvm_v2bf16_ty, llvm_v2bf16_ty, llvm_v2bf16_ty]>;
} // ftz
} // variant
@@ -1508,7 +1514,7 @@ let TargetPrefix = "nvvm" in {
foreach ftz = ["", "_ftz"] in {
foreach sat = ["", "_sat"] in {
def int_nvvm_fma # rnd # ftz # sat # _f : NVVMBuiltin,
- PureIntrinsic<[llvm_float_ty],
+ NVVMPureIntrinsic<[llvm_float_ty],
[llvm_float_ty, llvm_float_ty, llvm_float_ty]>;
} // sat
} // ftz
@@ -1591,10 +1597,10 @@ let TargetPrefix = "nvvm" in {
foreach a_type = ["s", "u"] in {
foreach b_type = ["s", "u"] in {
def int_nvvm_idp4a_ # a_type # _ # b_type :
- PureIntrinsic<[llvm_i32_ty],
+ NVVMPureIntrinsic<[llvm_i32_ty],
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
def int_nvvm_idp2a_ # a_type # _ # b_type :
- PureIntrinsic<[llvm_i32_ty],
+ NVVMPureIntrinsic<[llvm_i32_ty],
[llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
[ImmArg<ArgIndex<2>>]>;
}
@@ -1605,7 +1611,7 @@ let TargetPrefix = "nvvm" in {
//
foreach direction = ["l", "r"] in
def int_nvvm_fsh # direction # _clamp :
- PureIntrinsic<[llvm_anyint_ty],
+ NVVMPureIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
//
@@ -1613,7 +1619,7 @@ let TargetPrefix = "nvvm" in {
//
foreach sign = ["s", "u"] in
def int_nvvm_flo_ # sign :
- PureIntrinsic<[llvm_i32_ty], [llvm_anyint_ty, llvm_i1_ty],
+ NVVMPureIntrinsic<[llvm_i32_ty], [llvm_anyint_ty, llvm_i1_ty],
[ImmArg<ArgIndex<1>>]>;
//
@@ -1622,94 +1628,94 @@ let TargetPrefix = "nvvm" in {
foreach ext = ["sext", "zext"] in
foreach mode = ["wrap", "clamp"] in
def int_nvvm_ # ext # _ # mode :
- PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>;
+ NVVMPureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>;
//
// BMSK - bit mask
//
foreach mode = ["wrap", "clamp"] in
def int_nvvm_bmsk_ # mode :
- PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>;
+ NVVMPureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>;
//
// FNS - Find the n-th set bit
//
def int_nvvm_fns : NVVMBuiltin,
- PureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
+ NVVMPureIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
//
// Convert
//
- // TODO: All these intrinsics are defined as PureIntrinsic, this attaches the
+ // TODO: All these intrinsics are defined as NVVMPureIntrinsic, this attaches the
// IntrSpeculatable property to them. Consider if some of these should
// have this attribute removed as they may be too expensive.
//
def int_nvvm_lohi_i2d : NVVMBuiltin,
- PureIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty]>;
+ NVVMPureIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty]>;
def int_nvvm_d2i_lo : NVVMBuiltin,
- PureIntrinsic<[llvm_i32_ty], [llvm_double_ty]>;
+ NVVMPureIntrinsic<[llvm_i32_ty], [llvm_double_ty]>;
def int_nvvm_d2i_hi : NVVMBuiltin,
- PureIntrinsic<[llvm_i32_ty], [llvm_double_ty]>;
+ NVVMPureIntrinsic<[llvm_i32_ty], [llvm_double_ty]>;
foreach rnd = ["rn", "rz", "rm", "rp"] in {
foreach ftz = ["", "_ftz"] in
def int_nvvm_d2f_ # rnd # ftz : NVVMBuiltin,
- PureIntrinsic<[llvm_float_ty], [llvm_double_ty]>;
+ NVVMPureIntrinsic<[llvm_float_ty], [llvm_double_ty]>;
foreach sign = ["", "u"] in {
def int_nvvm_d2 # sign # i_ # rnd : NVVMBuiltin,
- PureIntrinsic<[llvm_i32_ty], [llvm_double_ty]>;
+ NVVMPureIntrinsic<[llvm_i32_ty], [llvm_double_ty]>;
def int_nvvm_ # sign # i2d_ # rnd : NVVMBuiltin,
- PureIntrinsic<[llvm_double_ty], [llvm_i32_ty]>;
+ NVVMPureIntrinsic<[llvm_double_ty], [llvm_i32_ty]>;
foreach ftz = ["", "_ftz"] in
def int_nvvm_f2 # sign # i_ # rnd # ftz : NVVMBuiltin,
- PureIntrinsic<[llvm_i32_ty], [llvm_float_ty]>;
+ NVVMPureIntrinsic<[llvm_i32_ty], [llvm_float_ty]>;
def int_nvvm_ # sign # i2f_ # rnd : NVVMBuiltin,
- PureIntrinsic<[llvm_float_ty], [llvm_i32_ty]>;
+ NVVMPureIntrinsic<[llvm_float_ty], [llvm_i32_ty]>;
foreach ftz = ["", "_ftz"] in
def int_nvvm_f2 # sign # ll_ # rnd # ftz : NVVMBuiltin,
- PureIntrinsic<[llvm_i64_ty], [llvm_float_ty]>;
+ NVVMPureIntrinsic<[llvm_i64_ty], [llvm_float_ty]>;
def int_nvvm_d2 # sign # ll_ # rnd : NVVMBuiltin,
- PureIntrinsic<[llvm_i64_ty], [llvm_double_ty]>;
+ NVVMPureIntrinsic<[llvm_i64_ty], [llvm_double_ty]>;
def int_nvvm_ # sign # ll2f_ # rnd : NVVMBuiltin,
- PureIntrinsic<[llvm_float_ty], [llvm_i64_ty]>;
+ NVVMPureIntrinsic<[llvm_float_ty], [llvm_i64_ty]>;
def int_nvvm_ # sign # ll2d_ # rnd : NVVMBuiltin,
- PureIntrinsic<[llvm_double_ty], [llvm_i64_ty]>;
+ NVVMPureIntrinsic<[llvm_double_ty], [llvm_i64_ty]>;
} // sign
} // rnd
foreach ftz = ["", "_ftz"] in {
def int_nvvm_f2h_rn # ftz : NVVMBuiltin,
- PureIntrinsic<[llvm_i16_ty], [llvm_float_ty]>;
+ NVVMPureIntrinsic<[llvm_i16_ty], [llvm_float_ty]>;
def int_nvvm_bf2h_rn # ftz : NVVMBuiltin,
- PureIntrinsic<[llvm_i16_ty], [llvm_bfloat_ty]>;
+ NVVMPureIntrinsic<[llvm_i16_ty], [llvm_bfloat_ty]>;
}
foreach rnd = ["rn", "rz"] in {
foreach relu = ["", "_relu"] in {
foreach satfinite = ["", "_satfinite"] in {
def int_nvvm_ff2bf16x2_ # rnd # relu # satfinite : NVVMBuiltin,
- PureIntrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty]>;
+ NVVMPureIntrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty]>;
def int_nvvm_ff2f16x2_ # rnd # relu # satfinite : NVVMBuiltin,
- PureIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty]>;
+ NVVMPureIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty]>;
def int_nvvm_f2bf16_ # rnd # relu # satfinite : NVVMBuiltin,
- PureIntrinsic<[llvm_bfloat_ty], [llvm_float_ty]>;
-
+ NVVMPureIntrinsic<[llvm_bfloat_ty], [llvm_float_ty]>;
+
def int_nvvm_f2f16_ # rnd # relu # satfinite : NVVMBuiltin,
- PureIntrinsic<[llvm_half_ty], [llvm_float_ty]>;
+ NVVMPureIntrinsic<[llvm_half_ty], [llvm_float_ty]>;
}
}
}
@@ -1719,78 +1725,78 @@ let TargetPrefix = "nvvm" in {
foreach relu = ["", "_relu"] in {
foreach satfinite = ["", "_satfinite"] in {
def int_nvvm_ff2f16x2_rs # relu # satfinite : NVVMBuiltin,
- PureIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty]>;
+ NVVMPureIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty]>;
def int_nvvm_ff2bf16x2_rs # relu # satfinite : NVVMBuiltin,
- PureIntrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty]>;
+ NVVMPureIntrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty]>;
}
}
foreach satfinite = ["", "_satfinite"] in {
def int_nvvm_f2tf32_rna # satfinite : NVVMBuiltin,
- PureIntrinsic<[llvm_i32_ty], [llvm_float_ty]>;
+ NVVMPureIntrinsic<[llvm_i32_ty], [llvm_float_ty]>;
foreach rnd = ["rn", "rz"] in
foreach relu = ["", "_relu"] in
def int_nvvm_f2tf32_ # rnd # relu # satfinite : NVVMBuiltin,
- PureIntrinsic<[llvm_i32_ty], [llvm_float_ty]>;
+ NVVMPureIntrinsic<[llvm_i32_ty], [llvm_float_ty]>;
}
foreach type = ["e4m3x2", "e5m2x2"] in {
foreach relu = ["", "_relu"] in {
def int_nvvm_ff_to_ # type # _rn # relu : NVVMBuiltin,
- PureIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>;
+ NVVMPureIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>;
def int_nvvm_f16x2_to_ # type # _rn # relu : NVVMBuiltin,
- PureIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty]>;
+ NVVMPureIntrinsic<[llvm_i16_ty], [llvm_v2f16_ty]>;
def int_nvvm_ # type # _to_f16x2_rn # relu : NVVMBuiltin,
- PureIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>;
+ NVVMPureIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>;
}
}
-
+
// RS rounding mode (Stochastic Rounding) conversions for f8x4 types
// The last i32 operand provides the random bits for the conversion
foreach type = ["e4m3x4", "e5m2x4"] in {
foreach relu = ["", "_relu"] in {
def int_nvvm_f32x4_to_ # type # _rs # relu # _satfinite : NVVMBuiltin,
- PureIntrinsic<[llvm_v4i8_ty], [llvm_v4f32_ty, llvm_i32_ty]>;
+ NVVMPureIntrinsic<[llvm_v4i8_ty], [llvm_v4f32_ty, llvm_i32_ty]>;
}
}
// FP4 conversions.
foreach relu = ["", "_relu"] in {
def int_nvvm_ff_to_e2m1x2_rn # relu # _satfinite : NVVMBuiltin,
- PureIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>;
+ NVVMPureIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>;
def int_nvvm_e2m1x2_to_f16x2_rn # relu : NVVMBuiltin,
- PureIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>;
+ NVVMPureIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>;
}
-
+
// RS rounding mode (Stochastic Rounding) conversions for f4x4 type
// The last i32 operand provides the random bits for the conversion
foreach relu = ["", "_relu"] in {
def int_nvvm_f32x4_to_e2m1x4_rs # relu # _satfinite : NVVMBuiltin,
- PureIntrinsic<[llvm_i16_ty], [llvm_v4f32_ty, llvm_i32_ty]>;
+ NVVMPureIntrinsic<[llvm_i16_ty], [llvm_v4f32_ty, llvm_i32_ty]>;
}
// FP6 conversions.
foreach type = ["e2m3x2", "e3m2x2"] in {
foreach relu = ["", "_relu"] in {
def int_nvvm_ff_to_ # type # _rn # relu # _satfinite : NVVMBuiltin,
- PureIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>;
+ NVVMPureIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>;
def int_nvvm_ # type # _to_f16x2_rn # relu : NVVMBuiltin,
- PureIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>;
+ NVVMPureIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>;
}
}
-
+
// RS rounding mode (Stochastic Rounding) conversions for f6x4 types
// The last i32 operand provides the random bits for the conversion
foreach type = ["e2m3x4", "e3m2x4"] in {
foreach relu = ["", "_relu"] in {
def int_nvvm_f32x4_to_ # type # _rs # relu # _satfinite : NVVMBuiltin,
- PureIntrinsic<[llvm_v4i8_ty], [llvm_v4f32_ty, llvm_i32_ty]>;
+ NVVMPureIntrinsic<[llvm_v4i8_ty], [llvm_v4f32_ty, llvm_i32_ty]>;
}
}
@@ -1799,16 +1805,16 @@ let TargetPrefix = "nvvm" in {
foreach satmode = ["", "_satfinite"] in {
defvar suffix = rmode # satmode;
def int_nvvm_ff_to_ue8m0x2 # suffix : NVVMBuiltin,
- PureIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>;
+ NVVMPureIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>;
def int_nvvm_bf16x2_to_ue8m0x2 # suffix : NVVMBuiltin,
- PureIntrinsic<[llvm_i16_ty], [llvm_v2bf16_ty]>;
+ NVVMPureIntrinsic<[llvm_i16_ty], [llvm_v2bf16_ty]>;
}
}
def int_nvvm_ue8m0x2_to_bf16x2 : NVVMBuiltin,
- PureIntrinsic<[llvm_v2bf16_ty], [llvm_i16_ty]>;
+ NVVMPureIntrinsic<[llvm_v2bf16_ty], [llvm_i16_ty]>;
//
// Atomic operations
@@ -2121,7 +2127,7 @@ let IntrProperties = [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillRetur
// space when lowered during ISel.
//
def int_nvvm_internal_addrspace_wrap :
- PureIntrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty],
+ NVVMPureIntrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty],
[NoUndef<ArgIndex<0>>, NoUndef<RetIndex>]>;
// Move intrinsics, used in nvvm internally
@@ -2137,20 +2143,20 @@ let IntrProperties = [IntrNoMem] in {
// For getting the handle from a texture or surface variable
def int_nvvm_texsurf_handle
- : PureIntrinsic<[llvm_i64_ty], [llvm_metadata_ty, llvm_anyptr_ty]>;
+ : NVVMPureIntrinsic<[llvm_i64_ty], [llvm_metadata_ty, llvm_anyptr_ty]>;
def int_nvvm_texsurf_handle_internal
- : PureIntrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
+ : NVVMPureIntrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
/// Error / Warn
def int_nvvm_compiler_error : Intrinsic<[], [llvm_anyptr_ty]>;
def int_nvvm_compiler_warn : Intrinsic<[], [llvm_anyptr_ty]>;
-def int_nvvm_reflect : NVVMBuiltin, PureIntrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
+def int_nvvm_reflect : NVVMBuiltin, NVVMPureIntrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
// isspacep.{const, global, local, shared}
foreach space = ["const", "global", "local", "shared", "shared_cluster"] in
def int_nvvm_isspacep_ # space : NVVMBuiltin,
- PureIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [NoCapture<ArgIndex<0>>]>;
+ NVVMPureIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [NoCapture<ArgIndex<0>>]>;
//
// Texture Fetch
@@ -2304,7 +2310,7 @@ foreach vec = [TV_I8, TV_I16, TV_I32,
// Accessing special registers.
//
class PTXReadSRegIntrinsicNB_r32<list<IntrinsicProperty> properties = [], string name = "">
- : PureIntrinsic<[llvm_i32_ty], [], [NoUndef<RetIndex>] # properties, name>;
+ : NVVMPureIntrinsic<[llvm_i32_ty], [], [NoUndef<RetIndex>] # properties, name>;
class PTXReadSRegIntrinsic_r32<list<IntrinsicProperty> properties = []>
: PTXReadSRegIntrinsicNB_r32<properties>, NVVMBuiltin;
@@ -2758,7 +2764,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>] in {
}
def int_nvvm_is_explicit_cluster
- : PureIntrinsic<[llvm_i1_ty], [], [NoUndef<RetIndex>],
+ : NVVMPureIntrinsic<[llvm_i1_ty], [], [NoUndef<RetIndex>],
"llvm.nvvm.is_explicit_cluster">;
// Setmaxnreg inc/dec intrinsics
@@ -2926,7 +2932,7 @@ let IntrProperties = [IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<
}
def int_nvvm_prefetch_tensormap : DefaultAttrsIntrinsic<[], [llvm_anyptr_ty]>;
-
+
foreach eviction_priority = ["evict_normal", "evict_last"] in
def int_nvvm_prefetch_global_L2_ # eviction_priority : DefaultAttrsIntrinsic<[], [llvm_global_ptr_ty]>;
@@ -3174,12 +3180,12 @@ def int_nvvm_clusterlaunchcontrol_try_cancel_async_multicast_shared
// clusterlaunchcontrol.query_cancel.is_canceled
def int_nvvm_clusterlaunchcontrol_query_cancel_is_canceled
- : PureIntrinsic<[llvm_i1_ty], [llvm_i128_ty], [],
+ : NVVMPureIntrinsic<[llvm_i1_ty], [llvm_i128_ty], [],
"llvm.nvvm.clusterlaunchcontrol.query_cancel.is_canceled">;
foreach dim = ["x", "y", "z"] in
def int_nvvm_clusterlaunchcontrol_query_cancel_get_first_ctaid_ # dim
- : PureIntrinsic<[llvm_i32_ty], [llvm_i128_ty], [],
+ : NVVMPureIntrinsic<[llvm_i32_ty], [llvm_i128_ty], [],
"llvm.nvvm.clusterlaunchcontrol.query_cancel.get_first_ctaid." # dim>;
//
@@ -3203,9 +3209,9 @@ foreach sp = [0, 1] in {
defvar scale_d_imm_range = [ImmArg<scale_d_imm>, Range<scale_d_imm, 0, 16>];
// Check if this is the specific llvm.nvvm.tcgen05.mma.tensor intrinsic.
- defvar is_target_intrinsic = !and(!eq(sp, 0),
- !eq(space, "tensor"),
- !eq(scale_d, 0),
+ defvar is_target_intrinsic = !and(!eq(sp, 0),
+ !eq(space, "tensor"),
+ !eq(scale_d, 0),
!eq(ashift, 0));
defvar base_properties = !listconcat(
@@ -3218,7 +3224,7 @@ foreach sp = [0, 1] in {
]
);
- defvar intrinsic_properties = !if(is_target_intrinsic,
+ defvar intrinsic_properties = !if(is_target_intrinsic,
!listconcat(base_properties,
[ArgInfo<ArgIndex<nargs>, [ArgName<"kind">, ImmArgPrinter<"printTcgen05MMAKind">]>,
ArgInfo<ArgIndex<!add(nargs, 1)>, [ArgName<"cta_group">]>,
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
index f730199e474f3..4db668e05cb21 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
@@ -1031,7 +1031,7 @@ attributes #1 = { "amdgpu-waves-per-eu"="1,1" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="1" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="2" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR3]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="4" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="6" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR7]] = { "amdgpu-agpr-alloc"="5" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll
index 2776b9187724c..4e53df3924985 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll
@@ -7,12 +7,14 @@
@lds_3 = external addrspace(3) global [0 x i8], align 4
@lds_4 = external addrspace(3) global [0 x i8], align 8
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] {{\[}}[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
;.
; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]]
; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 0, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 0, i32 32 } }, no_sanitize_address
-; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
-; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] {{\[}}[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
;.
define void @use_variables() sanitize_address {
; CHECK-LABEL: define void @use_variables(
@@ -248,7 +250,7 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind }
; CHECK: attributes #[[ATTR7]] = { nomerge }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll
index f33b30119754f..32601422c7e67 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll
@@ -66,7 +66,7 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP38:%.*]] = and i1 [[TMP34]], [[TMP37]]
; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP38]])
; CHECK-NEXT: [[TMP40:%.*]] = icmp ne i64 [[TMP39]], 0
-; CHECK-NEXT: br i1 [[TMP40]], label [[ASAN_REPORT:%.*]], label [[TMP43:%.*]], !prof [[PROF2:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP40]], label [[ASAN_REPORT:%.*]], label [[TMP43:%.*]], !prof [[PROF3:![0-9]+]]
; CHECK: asan.report:
; CHECK-NEXT: br i1 [[TMP38]], label [[TMP41:%.*]], label [[CONDFREE:%.*]]
; CHECK: 41:
@@ -103,11 +103,12 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind }
; CHECK: attributes #[[ATTR6]] = { nomerge }
;.
; CHECK: [[META0]] = !{i32 0, i32 1}
; CHECK: [[META1]] = !{i32 8, i32 9}
-; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[META2:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+; CHECK: [[PROF3]] = !{!"branch_weights", i32 1, i32 1048575}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll
index 40b1305a3b12c..bad2d8e0fb5f4 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll
@@ -8,12 +8,14 @@
@lds_3 = external addrspace(3) global [0 x i8], align 4
@lds_4 = external addrspace(3) global [0 x i8], align 8
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] {{\[}}[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
;.
; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]]
; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 0, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 0, i32 32 } }, no_sanitize_address
-; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
-; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] {{\[}}[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
;.
define void @use_variables() sanitize_address {
; CHECK-LABEL: define void @use_variables(
@@ -249,7 +251,7 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind }
; CHECK: attributes #[[ATTR7]] = { nomerge }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll
index f2cdc4c812db1..c5985e5cc4df8 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll
@@ -88,7 +88,7 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP53:%.*]] = and i1 [[TMP49]], [[TMP52]]
; CHECK-NEXT: [[TMP54:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP53]])
; CHECK-NEXT: [[TMP55:%.*]] = icmp ne i64 [[TMP54]], 0
-; CHECK-NEXT: br i1 [[TMP55]], label [[ASAN_REPORT:%.*]], label [[TMP58:%.*]], !prof [[PROF2:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP55]], label [[ASAN_REPORT:%.*]], label [[TMP58:%.*]], !prof [[PROF3:![0-9]+]]
; CHECK: asan.report:
; CHECK-NEXT: br i1 [[TMP53]], label [[TMP56:%.*]], label [[CONDFREE:%.*]]
; CHECK: 56:
@@ -114,7 +114,7 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP71:%.*]] = and i1 [[TMP66]], [[TMP70]]
; CHECK-NEXT: [[TMP72:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP71]])
; CHECK-NEXT: [[TMP73:%.*]] = icmp ne i64 [[TMP72]], 0
-; CHECK-NEXT: br i1 [[TMP73]], label [[ASAN_REPORT1:%.*]], label [[TMP76:%.*]], !prof [[PROF2]]
+; CHECK-NEXT: br i1 [[TMP73]], label [[ASAN_REPORT1:%.*]], label [[TMP76:%.*]], !prof [[PROF3]]
; CHECK: asan.report1:
; CHECK-NEXT: br i1 [[TMP71]], label [[TMP74:%.*]], label [[TMP75:%.*]]
; CHECK: 74:
@@ -139,7 +139,7 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP88:%.*]] = and i1 [[TMP84]], [[TMP87]]
; CHECK-NEXT: [[TMP89:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP88]])
; CHECK-NEXT: [[TMP90:%.*]] = icmp ne i64 [[TMP89]], 0
-; CHECK-NEXT: br i1 [[TMP90]], label [[ASAN_REPORT2:%.*]], label [[TMP93:%.*]], !prof [[PROF2]]
+; CHECK-NEXT: br i1 [[TMP90]], label [[ASAN_REPORT2:%.*]], label [[TMP93:%.*]], !prof [[PROF3]]
; CHECK: asan.report2:
; CHECK-NEXT: br i1 [[TMP88]], label [[TMP91:%.*]], label [[TMP92:%.*]]
; CHECK: 91:
@@ -164,7 +164,7 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP105:%.*]] = and i1 [[TMP101]], [[TMP104]]
; CHECK-NEXT: [[TMP106:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP105]])
; CHECK-NEXT: [[TMP107:%.*]] = icmp ne i64 [[TMP106]], 0
-; CHECK-NEXT: br i1 [[TMP107]], label [[ASAN_REPORT3:%.*]], label [[TMP110:%.*]], !prof [[PROF2]]
+; CHECK-NEXT: br i1 [[TMP107]], label [[ASAN_REPORT3:%.*]], label [[TMP110:%.*]], !prof [[PROF3]]
; CHECK: asan.report3:
; CHECK-NEXT: br i1 [[TMP105]], label [[TMP108:%.*]], label [[TMP109:%.*]]
; CHECK: 108:
@@ -203,11 +203,12 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind }
; CHECK: attributes #[[ATTR6]] = { nomerge }
;.
; CHECK: [[META0]] = !{i32 0, i32 1}
; CHECK: [[META1]] = !{i32 8, i32 9}
-; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[META2:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+; CHECK: [[PROF3]] = !{!"branch_weights", i32 1, i32 1048575}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll
index b9b4c90daea87..fa888a35cb8ba 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll
@@ -7,11 +7,13 @@
@lds_3 = external addrspace(3) global [3 x i8], align 4
@lds_4 = external addrspace(3) global [4 x i8], align 8
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] {{\[}}[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
;.
; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 3, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 4, i32 32 } }, no_sanitize_address
-; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
-; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] {{\[}}[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
;.
define void @use_variables() sanitize_address {
; CHECK-LABEL: define void @use_variables(
@@ -217,7 +219,7 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind }
; CHECK: attributes #[[ATTR7]] = { nomerge }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll
index a70db2259cc3f..a521d9d9d436b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll
@@ -6,10 +6,11 @@
@lds_var = internal addrspace(3) global [1024 x i32] poison, align 4
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel], no_sanitize_address
;.
; CHECK: @llvm.amdgcn.sw.lds.my_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]]
; CHECK: @llvm.amdgcn.sw.lds.my_kernel.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.my_kernel.md.type { %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 32, i32 4096, i32 5120 } }, no_sanitize_address
-; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel], no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel], no_sanitize_address
;.
define void @my_function(ptr addrspace(3) %lds_arg) sanitize_address {
; CHECK-LABEL: define void @my_function(
@@ -33,7 +34,7 @@ define void @my_function(ptr addrspace(3) %lds_arg) sanitize_address {
; CHECK-NEXT: [[TMP17:%.*]] = and i1 [[TMP12]], [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP17]])
; CHECK-NEXT: [[TMP19:%.*]] = icmp ne i64 [[TMP18]], 0
-; CHECK-NEXT: br i1 [[TMP19]], label [[ASAN_REPORT:%.*]], label [[TMP22:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP19]], label [[ASAN_REPORT:%.*]], label [[TMP22:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK: asan.report:
; CHECK-NEXT: br i1 [[TMP17]], label [[TMP20:%.*]], label [[TMP21:%.*]]
; CHECK: 20:
@@ -60,7 +61,7 @@ define void @my_function(ptr addrspace(3) %lds_arg) sanitize_address {
; CHECK-NEXT: [[TMP36:%.*]] = and i1 [[TMP31]], [[TMP35]]
; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP36]])
; CHECK-NEXT: [[TMP38:%.*]] = icmp ne i64 [[TMP37]], 0
-; CHECK-NEXT: br i1 [[TMP38]], label [[ASAN_REPORT1:%.*]], label [[TMP41:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[TMP38]], label [[ASAN_REPORT1:%.*]], label [[TMP41:%.*]], !prof [[PROF2]]
; CHECK: asan.report1:
; CHECK-NEXT: br i1 [[TMP36]], label [[TMP39:%.*]], label [[TMP40:%.*]]
; CHECK: 39:
@@ -81,7 +82,7 @@ define void @my_function(ptr addrspace(3) %lds_arg) sanitize_address {
define amdgpu_kernel void @my_kernel() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @my_kernel(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
; CHECK-NEXT: WId:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
@@ -142,11 +143,12 @@ define amdgpu_kernel void @my_kernel() sanitize_address {
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind }
; CHECK: attributes #[[ATTR7]] = { nomerge }
;.
; CHECK: [[META0]] = !{i32 0, i32 1}
-; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
-; CHECK: [[META2]] = !{i32 0}
+; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[META3]] = !{i32 0}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll
index 73ffcdd783ded..ab3300ea659b8 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll
@@ -150,7 +150,7 @@ attributes #1 = { "amdgpu-no-heap-ptr" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind }
; CHECK: attributes #[[ATTR6]] = { nomerge }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll
index 301bda7e0086e..c7550dd9576ec 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll
@@ -149,7 +149,7 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind }
; CHECK: attributes #[[ATTR6]] = { nomerge }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll
index 02a241f947748..15b074c2d9c11 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll
@@ -122,7 +122,7 @@ define amdgpu_kernel void @atomic_xchg_kernel(ptr addrspace(1) %out, [8 x i32],
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind }
; CHECK: attributes #[[ATTR6]] = { nomerge }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll
index b87b3fd824dd3..1b3664bf1e4e7 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll
@@ -204,7 +204,7 @@ define amdgpu_kernel void @atomicrmw_kernel(ptr addrspace(1) %arg0) sanitize_add
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind }
; CHECK: attributes #[[ATTR6]] = { nomerge }
;.
diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-pass-second-run.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-pass-second-run.ll
index 60ef1079624d7..218a0dfa9b060 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/asan-pass-second-run.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/asan-pass-second-run.ll
@@ -8,9 +8,11 @@ target triple = "x86_64-unknown-linux-gnu"
; Function with sanitize_address is instrumented.
; Function Attrs: nounwind uwtable
;.
+; CHECK: @llvm.used = appending global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
; CHECK: @___asan_globals_registered = common hidden global i64 0
; CHECK: @__start_asan_globals = extern_weak hidden global i64
; CHECK: @__stop_asan_globals = extern_weak hidden global i64
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }]
;.
define void @instr_sa(ptr %a) sanitize_address {
; CHECK: Function Attrs: sanitize_address
@@ -31,7 +33,7 @@ define void @instr_sa(ptr %a) sanitize_address {
; CHECK-NEXT: [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
; CHECK-NEXT: br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB12]]
; CHECK: [[BB11]]:
-; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: unreachable
; CHECK: [[BB12]]:
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
@@ -47,8 +49,9 @@ entry:
}
;.
; CHECK: attributes #[[ATTR0]] = { sanitize_address }
-; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; CHECK: attributes #[[ATTR2]] = { nomerge }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind }
+; CHECK: attributes #[[ATTR3]] = { nomerge }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
diff --git a/llvm/test/tools/llvm-reduce/remove-attributes-convergent-uncontrolled.ll b/llvm/test/tools/llvm-reduce/remove-attributes-convergent-uncontrolled.ll
index d408f949db824..64e1588d043ae 100644
--- a/llvm/test/tools/llvm-reduce/remove-attributes-convergent-uncontrolled.ll
+++ b/llvm/test/tools/llvm-reduce/remove-attributes-convergent-uncontrolled.ll
@@ -24,8 +24,8 @@ declare float @convergent.extern.func(float, float) #0
declare float @extern.func(float, float)
declare float @llvm.amdgcn.readfirstlane.f32(float) #1
-; RESULT: attributes #0 = { convergent nocallback nofree nounwind willreturn memory(none) }
+; RESULT: attributes #0 = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) }
; RESULT-NOT: attributes
attributes #0 = { convergent nounwind }
-attributes #1 = { convergent nocallback nofree nounwind willreturn memory(none) }
+attributes #1 = { convergent nocallback nocreateundeforpoison nofree nounwind willreturn memory(none) }
More information about the llvm-commits
mailing list