[clang] [AMDGPU] Programmatically port old `.def` clang builtins to `.td` (PR #175873)

Joseph Huber via cfe-commits cfe-commits at lists.llvm.org
Wed Jan 14 10:14:26 PST 2026


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/175873

>From 1b53f544eac854865a691a5ffa95632a86c85eff Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Tue, 13 Jan 2026 18:38:32 -0600
Subject: [PATCH] [AMDGPU] Programmatically port old `.def` clang builtins to
 `.td`

Summary:
This PR ports the old `.def` builtins to the new Tablegen interface.
This required a few changes in the handler, namely there is a real
meaning to `AS(0)` right now, not just in SPIR-V but when the type
parser expects it. The conversion here should be 1-to-1.

Some more work could be done to reduce the amount of repetition by
grouping all the instructions together, I'll leave that up to whether or
not anyone cares.

Trim redundant signs
---
 clang/include/clang/Basic/BuiltinsAMDGPU.def | 1018 ------------------
 clang/include/clang/Basic/BuiltinsAMDGPU.td  | 1017 +++++++++++++++++
 clang/include/clang/Basic/CMakeLists.txt     |    4 +
 clang/include/clang/Basic/TargetBuiltins.h   |    5 +-
 clang/include/module.modulemap               |    1 -
 clang/lib/Basic/Targets/AMDGPU.cpp           |   22 +-
 6 files changed, 1034 insertions(+), 1033 deletions(-)
 delete mode 100644 clang/include/clang/Basic/BuiltinsAMDGPU.def
 create mode 100644 clang/include/clang/Basic/BuiltinsAMDGPU.td

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
deleted file mode 100644
index c443be3a252a9..0000000000000
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ /dev/null
@@ -1,1018 +0,0 @@
-//==- BuiltinsAMDGPU.def - AMDGPU Builtin function database ------*- C++ -*-==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the AMDGPU-specific builtin function database. Users of
-// this file must define the BUILTIN macro to make use of this information.
-//
-// Note: (unsigned) long int type should be avoided in builtin definitions
-// since it has different size on Linux (64 bit) and Windows (32 bit).
-// (unsigned) long long int type should also be avoided, which is 64 bit for
-// C/C++/HIP but is 128 bit for OpenCL. Use `W` as width modifier in builtin
-// definitions since it is fixed for 64 bit.
-//===----------------------------------------------------------------------===//
-
-// The format of this database matches clang/Basic/Builtins.def.
-
-#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
-#   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
-#endif
-//===----------------------------------------------------------------------===//
-// SI+ only builtins.
-//===----------------------------------------------------------------------===//
-
-BUILTIN(__builtin_amdgcn_dispatch_ptr, "v*4", "nc")
-BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "v*4", "nc")
-BUILTIN(__builtin_amdgcn_implicitarg_ptr, "v*4", "nc")
-BUILTIN(__builtin_amdgcn_queue_ptr, "v*4", "nc")
-
-BUILTIN(__builtin_amdgcn_workgroup_id_x, "Ui", "nc")
-BUILTIN(__builtin_amdgcn_workgroup_id_y, "Ui", "nc")
-BUILTIN(__builtin_amdgcn_workgroup_id_z, "Ui", "nc")
-
-TARGET_BUILTIN(__builtin_amdgcn_cluster_id_x, "Ui", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cluster_id_y, "Ui", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cluster_id_z, "Ui", "nc", "gfx1250-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_cluster_workgroup_id_x, "Ui", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cluster_workgroup_id_y, "Ui", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cluster_workgroup_id_z, "Ui", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cluster_workgroup_flat_id, "Ui", "nc", "gfx1250-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_cluster_workgroup_max_id_x, "Ui", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cluster_workgroup_max_id_y, "Ui", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cluster_workgroup_max_id_z, "Ui", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cluster_workgroup_max_flat_id, "Ui", "nc", "gfx1250-insts")
-
-BUILTIN(__builtin_amdgcn_workitem_id_x, "Ui", "nc")
-BUILTIN(__builtin_amdgcn_workitem_id_y, "Ui", "nc")
-BUILTIN(__builtin_amdgcn_workitem_id_z, "Ui", "nc")
-
-BUILTIN(__builtin_amdgcn_workgroup_size_x, "Us", "nc")
-BUILTIN(__builtin_amdgcn_workgroup_size_y, "Us", "nc")
-BUILTIN(__builtin_amdgcn_workgroup_size_z, "Us", "nc")
-
-BUILTIN(__builtin_amdgcn_grid_size_x, "Ui", "nc")
-BUILTIN(__builtin_amdgcn_grid_size_y, "Ui", "nc")
-BUILTIN(__builtin_amdgcn_grid_size_z, "Ui", "nc")
-
-BUILTIN(__builtin_amdgcn_mbcnt_hi, "UiUiUi", "nc")
-BUILTIN(__builtin_amdgcn_mbcnt_lo, "UiUiUi", "nc")
-
-TARGET_BUILTIN(__builtin_amdgcn_s_memtime, "WUi", "n", "s-memtime-inst")
-
-//===----------------------------------------------------------------------===//
-// Instruction builtins.
-//===----------------------------------------------------------------------===//
-BUILTIN(__builtin_amdgcn_s_getreg, "UiIi", "n")
-BUILTIN(__builtin_amdgcn_s_setreg, "vIiUi", "n")
-BUILTIN(__builtin_amdgcn_s_getpc, "WUi", "n")
-BUILTIN(__builtin_amdgcn_s_waitcnt, "vIi", "n")
-BUILTIN(__builtin_amdgcn_s_sendmsg, "vIiUi", "n")
-BUILTIN(__builtin_amdgcn_s_sendmsghalt, "vIiUi", "n")
-BUILTIN(__builtin_amdgcn_s_barrier, "v", "n")
-BUILTIN(__builtin_amdgcn_s_ttracedata, "vi", "n")
-BUILTIN(__builtin_amdgcn_wave_barrier, "v", "n")
-BUILTIN(__builtin_amdgcn_sched_barrier, "vIi", "n")
-BUILTIN(__builtin_amdgcn_sched_group_barrier, "vIiIiIi", "n")
-BUILTIN(__builtin_amdgcn_iglp_opt, "vIi", "n")
-BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n")
-BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n")
-BUILTIN(__builtin_amdgcn_fence, "vUicC*.", "n")
-BUILTIN(__builtin_amdgcn_groupstaticsize, "Ui", "n")
-BUILTIN(__builtin_amdgcn_wavefrontsize, "Ui", "nc")
-
-BUILTIN(__builtin_amdgcn_atomic_inc32, "UZiUZiD*UZiUicC*", "n")
-BUILTIN(__builtin_amdgcn_atomic_inc64, "UWiUWiD*UWiUicC*", "n")
-
-BUILTIN(__builtin_amdgcn_atomic_dec32, "UZiUZiD*UZiUicC*", "n")
-BUILTIN(__builtin_amdgcn_atomic_dec64, "UWiUWiD*UWiUicC*", "n")
-
-// FIXME: Need to disallow constant address space.
-BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n")
-BUILTIN(__builtin_amdgcn_div_scalef, "fffbb*", "n")
-BUILTIN(__builtin_amdgcn_div_fmas, "ddddb", "nc")
-BUILTIN(__builtin_amdgcn_div_fmasf, "ffffb", "nc")
-BUILTIN(__builtin_amdgcn_div_fixup, "dddd", "nc")
-BUILTIN(__builtin_amdgcn_div_fixupf, "ffff", "nc")
-BUILTIN(__builtin_amdgcn_trig_preop, "ddi", "nc")
-BUILTIN(__builtin_amdgcn_trig_preopf, "ffi", "nc")
-BUILTIN(__builtin_amdgcn_rcp, "dd", "nc")
-BUILTIN(__builtin_amdgcn_rcpf, "ff", "nc")
-BUILTIN(__builtin_amdgcn_sqrt, "dd", "nc")
-BUILTIN(__builtin_amdgcn_sqrtf, "ff", "nc")
-BUILTIN(__builtin_amdgcn_rsq, "dd", "nc")
-BUILTIN(__builtin_amdgcn_rsqf, "ff", "nc")
-BUILTIN(__builtin_amdgcn_rsq_clamp, "dd", "nc")
-BUILTIN(__builtin_amdgcn_rsq_clampf, "ff", "nc")
-BUILTIN(__builtin_amdgcn_sinf, "ff", "nc")
-BUILTIN(__builtin_amdgcn_cosf, "ff", "nc")
-BUILTIN(__builtin_amdgcn_logf, "ff", "nc")
-BUILTIN(__builtin_amdgcn_exp2f, "ff", "nc")
-BUILTIN(__builtin_amdgcn_log_clampf, "ff", "nc")
-BUILTIN(__builtin_amdgcn_ldexp, "ddi", "nc")
-BUILTIN(__builtin_amdgcn_ldexpf, "ffi", "nc")
-BUILTIN(__builtin_amdgcn_frexp_mant, "dd", "nc")
-BUILTIN(__builtin_amdgcn_frexp_mantf, "ff", "nc")
-BUILTIN(__builtin_amdgcn_frexp_exp, "id", "nc")
-BUILTIN(__builtin_amdgcn_frexp_expf, "if", "nc")
-BUILTIN(__builtin_amdgcn_fract, "dd", "nc")
-BUILTIN(__builtin_amdgcn_fractf, "ff", "nc")
-TARGET_BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc", "lerp-inst")
-BUILTIN(__builtin_amdgcn_class, "bdi", "nc")
-BUILTIN(__builtin_amdgcn_classf, "bfi", "nc")
-TARGET_BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc", "cube-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc", "cube-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc", "cube-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc", "cube-insts")
-BUILTIN(__builtin_amdgcn_s_sleep, "vIi", "n")
-BUILTIN(__builtin_amdgcn_s_incperflevel, "vIi", "n")
-BUILTIN(__builtin_amdgcn_s_decperflevel, "vIi", "n")
-BUILTIN(__builtin_amdgcn_s_setprio, "vIs", "n")
-BUILTIN(__builtin_amdgcn_ds_swizzle, "iiIi", "nc")
-BUILTIN(__builtin_amdgcn_ds_permute, "iii", "nc")
-BUILTIN(__builtin_amdgcn_ds_bpermute, "iii", "nc")
-BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc")
-BUILTIN(__builtin_amdgcn_readlane, "iii", "nc")
-BUILTIN(__builtin_amdgcn_fmed3f, "ffff", "nc")
-BUILTIN(__builtin_amdgcn_ds_faddf, "ff*3fIiIiIb", "n")
-BUILTIN(__builtin_amdgcn_ds_fminf, "ff*3fIiIiIb", "n")
-BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3fIiIiIb", "n")
-BUILTIN(__builtin_amdgcn_ds_append, "ii*3", "n")
-BUILTIN(__builtin_amdgcn_ds_consume, "ii*3", "n")
-BUILTIN(__builtin_amdgcn_alignbit, "UiUiUiUi", "nc")
-BUILTIN(__builtin_amdgcn_alignbyte, "UiUiUiUi", "nc")
-BUILTIN(__builtin_amdgcn_ubfe, "UiUiUiUi", "nc")
-BUILTIN(__builtin_amdgcn_sbfe, "UiUiUiUi", "nc")
-BUILTIN(__builtin_amdgcn_cvt_pkrtz, "E2hff", "nc")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc", "cvt-pknorm-vop2-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc", "cvt-pknorm-vop2-insts")
-BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc")
-BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc")
-BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc")
-BUILTIN(__builtin_amdgcn_cvt_off_f32_i4, "fi", "nc")
-BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc")
-TARGET_BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc", "sad-insts")
-TARGET_BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc", "sad-insts")
-TARGET_BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc", "sad-insts")
-TARGET_BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc", "qsad-insts")
-BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
-BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc")
-
-BUILTIN(__builtin_amdgcn_make_buffer_rsrc, "Qbv*sWii", "nc")
-BUILTIN(__builtin_amdgcn_raw_buffer_store_b8, "vUcQbiiIi", "n")
-BUILTIN(__builtin_amdgcn_raw_buffer_store_b16, "vUsQbiiIi", "n")
-BUILTIN(__builtin_amdgcn_raw_buffer_store_b32, "vUiQbiiIi", "n")
-BUILTIN(__builtin_amdgcn_raw_buffer_store_b64, "vV2UiQbiiIi", "n")
-BUILTIN(__builtin_amdgcn_raw_buffer_store_b96, "vV3UiQbiiIi", "n")
-BUILTIN(__builtin_amdgcn_raw_buffer_store_b128, "vV4UiQbiiIi", "n")
-BUILTIN(__builtin_amdgcn_raw_buffer_load_b8, "UcQbiiIi", "n")
-BUILTIN(__builtin_amdgcn_raw_buffer_load_b16, "UsQbiiIi", "n")
-BUILTIN(__builtin_amdgcn_raw_buffer_load_b32, "UiQbiiIi", "n")
-BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2UiQbiiIi", "n")
-BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n")
-BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n")
-
-BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32, "iiQbiiIi", "")
-
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32, "ffQbiiIi", "", "atomic-fadd-rtn-insts")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16, "V2xV2xQbiiIi", "", "atomic-buffer-global-pk-add-f16-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32, "ffQbiiIi", "", "atomic-fmin-fmax-global-f32")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32, "ffQbiiIi", "", "atomic-fmin-fmax-global-f32")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64, "ddQbiiIi", "", "atomic-fmin-fmax-global-f64")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64, "ddQbiiIi", "", "atomic-fmin-fmax-global-f64")
-
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "", "vmem-to-lds-load-insts")
-TARGET_BUILTIN(__builtin_amdgcn_struct_ptr_buffer_load_lds, "vQbv*3IUiiiiIiIi", "", "vmem-to-lds-load-insts")
-
-//===----------------------------------------------------------------------===//
-// Ballot builtins.
-//===----------------------------------------------------------------------===//
-
-TARGET_BUILTIN(__builtin_amdgcn_ballot_w32, "ZUib", "nc", "wavefrontsize32")
-BUILTIN(__builtin_amdgcn_ballot_w64, "WUib", "nc")
-
-TARGET_BUILTIN(__builtin_amdgcn_inverse_ballot_w32, "bZUi", "nc", "wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_inverse_ballot_w64, "bWUi", "nc", "wavefrontsize64")
-
-// Deprecated intrinsics in favor of __builtin_amdgn_ballot_{w32|w64}
-BUILTIN(__builtin_amdgcn_uicmp, "WUiUiUiIi", "nc")
-BUILTIN(__builtin_amdgcn_uicmpl, "WUiWUiWUiIi", "nc")
-BUILTIN(__builtin_amdgcn_sicmp, "WUiiiIi", "nc")
-BUILTIN(__builtin_amdgcn_sicmpl, "WUiWiWiIi", "nc")
-BUILTIN(__builtin_amdgcn_fcmp, "WUiddIi", "nc")
-BUILTIN(__builtin_amdgcn_fcmpf, "WUiffIi", "nc")
-
-//===----------------------------------------------------------------------===//
-// Flat addressing builtins.
-//===----------------------------------------------------------------------===//
-BUILTIN(__builtin_amdgcn_is_shared, "bvC*0", "nc")
-BUILTIN(__builtin_amdgcn_is_private, "bvC*0", "nc")
-
-//===----------------------------------------------------------------------===//
-// GWS builtins.
-//===----------------------------------------------------------------------===//
-TARGET_BUILTIN(__builtin_amdgcn_ds_gws_init, "vUiUi", "n", "gws")
-TARGET_BUILTIN(__builtin_amdgcn_ds_gws_barrier, "vUiUi", "n", "gws")
-TARGET_BUILTIN(__builtin_amdgcn_ds_gws_sema_v, "vUi", "n", "gws")
-TARGET_BUILTIN(__builtin_amdgcn_ds_gws_sema_br, "vUiUi", "n", "gws")
-TARGET_BUILTIN(__builtin_amdgcn_ds_gws_sema_p, "vUi", "n", "gws")
-
-//===----------------------------------------------------------------------===//
-// CI+ only builtins.
-//===----------------------------------------------------------------------===//
-TARGET_BUILTIN(__builtin_amdgcn_s_dcache_inv_vol, "v", "n", "ci-insts")
-TARGET_BUILTIN(__builtin_amdgcn_buffer_wbinvl1_vol, "v", "n", "ci-insts")
-TARGET_BUILTIN(__builtin_amdgcn_ds_gws_sema_release_all, "vUi", "n", "ci-insts")
-
-//===----------------------------------------------------------------------===//
-// Interpolation builtins.
-//===----------------------------------------------------------------------===//
-BUILTIN(__builtin_amdgcn_interp_p1_f16, "ffUiUibUi", "nc")
-BUILTIN(__builtin_amdgcn_interp_p2_f16, "hffUiUibUi", "nc")
-BUILTIN(__builtin_amdgcn_interp_p1, "ffUiUiUi", "nc")
-BUILTIN(__builtin_amdgcn_interp_p2, "fffUiUiUi", "nc")
-BUILTIN(__builtin_amdgcn_interp_mov, "fUiUiUiUi", "nc")
-
-//===----------------------------------------------------------------------===//
-// VI+ only builtins.
-//===----------------------------------------------------------------------===//
-
-TARGET_BUILTIN(__builtin_amdgcn_div_fixuph, "hhhh", "nc", "16-bit-insts")
-TARGET_BUILTIN(__builtin_amdgcn_rcph, "hh", "nc", "16-bit-insts")
-TARGET_BUILTIN(__builtin_amdgcn_sqrth, "hh", "nc", "16-bit-insts")
-TARGET_BUILTIN(__builtin_amdgcn_rsqh, "hh", "nc", "16-bit-insts")
-TARGET_BUILTIN(__builtin_amdgcn_sinh, "hh", "nc", "16-bit-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cosh, "hh", "nc", "16-bit-insts")
-TARGET_BUILTIN(__builtin_amdgcn_ldexph, "hhi", "nc", "16-bit-insts")
-TARGET_BUILTIN(__builtin_amdgcn_frexp_manth, "hh", "nc", "16-bit-insts")
-TARGET_BUILTIN(__builtin_amdgcn_frexp_exph, "sh", "nc", "16-bit-insts")
-TARGET_BUILTIN(__builtin_amdgcn_fracth, "hh", "nc", "16-bit-insts")
-TARGET_BUILTIN(__builtin_amdgcn_classh, "bhi", "nc", "16-bit-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "WUi", "n", "s-memrealtime")
-TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nct", "dpp")
-TARGET_BUILTIN(__builtin_amdgcn_update_dpp, "iiiIiIiIiIb", "nct", "dpp")
-TARGET_BUILTIN(__builtin_amdgcn_s_dcache_wb, "v", "n", "gfx8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_perm, "UiUiUiUi", "nc", "gfx8-insts")
-
-//===----------------------------------------------------------------------===//
-// GFX9+ only builtins.
-//===----------------------------------------------------------------------===//
-
-TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f64, "dd*1d", "", "gfx90a-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "", "atomic-fadd-rtn-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2f16, "V2xV2x*1V2x", "t", "atomic-buffer-global-pk-add-f16-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmin_f64, "dd*1d", "", "gfx90a-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmax_f64, "dd*1d", "", "gfx90a-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f64, "dd*0d", "", "gfx90a-insts")
-TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fmin_f64, "dd*0d", "", "gfx90a-insts")
-TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fmax_f64, "dd*0d", "", "gfx90a-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f64, "dd*3d", "", "gfx90a-insts")
-TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f32, "ff*3f", "", "gfx8-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f32, "ff*0f", "", "gfx940-insts")
-TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2f16, "V2xV2x*0V2x", "t", "atomic-flat-pk-add-16-insts")
-TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "atomic-flat-pk-add-16-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t", "atomic-global-pk-add-bf16-inst")
-TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", "atomic-ds-pk-add-16-insts")
-TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2f16, "V2xV2x*3V2x", "t", "atomic-ds-pk-add-16-insts")
-TARGET_BUILTIN(__builtin_amdgcn_load_to_lds, "vv*v*3IUiIiIUi", "", "vmem-to-lds-load-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "", "vmem-to-lds-load-insts")
-
-//===----------------------------------------------------------------------===//
-// Deep learning builtins.
-//===----------------------------------------------------------------------===//
-
-TARGET_BUILTIN(__builtin_amdgcn_fdot2, "fV2xV2xfIb", "nc", "dot10-insts")
-TARGET_BUILTIN(__builtin_amdgcn_fdot2_f16_f16, "xV2xV2xx", "nc", "dot9-insts")
-TARGET_BUILTIN(__builtin_amdgcn_fdot2_bf16_bf16, "sV2sV2ss", "nc", "dot9-insts")
-TARGET_BUILTIN(__builtin_amdgcn_fdot2_f32_bf16, "fV2sV2sfIb", "nc", "dot12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_sdot2, "SiV2SsV2SsSiIb", "nc", "dot2-insts")
-TARGET_BUILTIN(__builtin_amdgcn_udot2, "UiV2UsV2UsUiIb", "nc", "dot2-insts")
-TARGET_BUILTIN(__builtin_amdgcn_sdot4, "SiSiSiSiIb", "nc", "dot1-insts")
-TARGET_BUILTIN(__builtin_amdgcn_udot4, "UiUiUiUiIb", "nc", "dot7-insts")
-TARGET_BUILTIN(__builtin_amdgcn_sudot4, "iIbiIbiiIb", "nc", "dot8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_sdot8, "SiSiSiSiIb", "nc", "dot1-insts")
-TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUiIb", "nc", "dot7-insts")
-TARGET_BUILTIN(__builtin_amdgcn_sudot8, "iIbiIbiiIb", "nc", "dot8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_fp8_bf8, "fUiUif", "nc", "dot11-insts")
-TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_fp8, "fUiUif", "nc", "dot11-insts")
-TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_fp8_fp8, "fUiUif", "nc", "dot11-insts")
-TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_bf8, "fUiUif", "nc", "dot11-insts")
-TARGET_BUILTIN(__builtin_amdgcn_fdot2c_f32_bf16, "fV2yV2yfIb", "nc", "dot13-insts")
-
-//===----------------------------------------------------------------------===//
-// GFX10+ only builtins.
-//===----------------------------------------------------------------------===//
-TARGET_BUILTIN(__builtin_amdgcn_permlane16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts")
-TARGET_BUILTIN(__builtin_amdgcn_permlanex16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mov_dpp8, "UiUiIUi", "nct", "gfx10-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_ttracedata_imm, "vIs", "n", "gfx10-insts")
-
-//===----------------------------------------------------------------------===//
-// Raytracing builtins.
-// By default the 1st argument is i32 and the 4/5-th arguments are float4.
-// Postfix l indicates the 1st argument is i64.
-// Postfix h indicates the 4/5-th arguments are half4.
-//===----------------------------------------------------------------------===//
-TARGET_BUILTIN(__builtin_amdgcn_image_bvh_intersect_ray, "V4UiUifV4fV4fV4fV4Ui", "nc", "gfx10-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_bvh_intersect_ray_h, "V4UiUifV4fV4xV4xV4Ui", "nc", "gfx10-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_bvh_intersect_ray_l, "V4UiWUifV4fV4fV4fV4Ui", "nc", "gfx10-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_bvh_intersect_ray_lh, "V4UiWUifV4fV4xV4xV4Ui", "nc", "gfx10-insts")
-
-
-//===----------------------------------------------------------------------===//
-// GFX11+ only builtins.
-//===----------------------------------------------------------------------===//
-
-// TODO: This is a no-op in wave32. Should the builtin require wavefrontsize64?
-TARGET_BUILTIN(__builtin_amdgcn_permlane64, "UiUi", "nc", "gfx11-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_wait_event_export_ready, "v", "n", "gfx11-insts")
-
-//===----------------------------------------------------------------------===//
-// WMMA builtins.
-// Postfix w32 indicates the builtin requires wavefront size of 32.
-// Postfix w64 indicates the builtin requires wavefront size of 64.
-//===----------------------------------------------------------------------===//
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_f16_w32, "V8fV16xV16xV8f", "nc", "gfx11-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32, "V8fV16sV16sV8f", "nc", "gfx11-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x16_f16_w32, "V16xV16xV16xV16xIb", "nc", "gfx11-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32, "V16sV16sV16sV16sIb", "nc", "gfx11-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32, "V16xV16xV16xV16xIb", "nc", "gfx11-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32, "V16sV16sV16sV16sIb", "nc", "gfx11-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32, "V8iIbV4iIbV4iV8iIb", "nc", "gfx11-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32, "V8iIbV2iIbV2iV8iIb", "nc", "gfx11-insts,wavefrontsize32")
-
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_f16_w64, "V4fV16xV16xV4f", "nc", "gfx11-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64, "V4fV16sV16sV4f", "nc", "gfx11-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x16_f16_w64, "V8xV16xV16xV8xIb", "nc", "gfx11-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64, "V8sV16sV16sV8sIb", "nc", "gfx11-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64, "V8xV16xV16xV8xIb", "nc", "gfx11-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64, "V8sV16sV16sV8sIb", "nc", "gfx11-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64, "V4iIbV4iIbV4iV4iIb", "nc", "gfx11-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64, "V4iIbV2iIbV2iV4iIb", "nc", "gfx11-insts,wavefrontsize64")
-
-TARGET_BUILTIN(__builtin_amdgcn_s_sendmsg_rtn, "UiUIi", "n", "gfx11-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_sendmsg_rtnl, "UWiUIi", "n", "gfx11-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_ds_bvh_stack_rtn, "V2UiUiUiV4UiIi", "n", "gfx11-insts")
-
-//===----------------------------------------------------------------------===//
-// Special builtins.
-//===----------------------------------------------------------------------===//
-BUILTIN(__builtin_amdgcn_read_exec, "WUi", "nc")
-BUILTIN(__builtin_amdgcn_read_exec_lo, "Ui", "nc")
-BUILTIN(__builtin_amdgcn_read_exec_hi, "Ui", "nc")
-
-BUILTIN(__builtin_amdgcn_endpgm, "v", "nr")
-
-BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n")
-BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n")
-
-//===----------------------------------------------------------------------===//
-
-// Wave Reduction builtins.
-
-//===----------------------------------------------------------------------===//
-
-BUILTIN(__builtin_amdgcn_wave_reduce_add_u32, "ZUiZUiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_sub_u32, "ZUiZUiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_min_i32, "ZiZiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_min_u32, "ZUiZUiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_max_i32, "ZiZiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_max_u32, "ZUiZUiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_and_b32, "ZiZiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_or_b32, "ZiZiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_xor_b32, "ZiZiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_add_u64, "WUiWUiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_sub_u64, "WUiWUiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_min_i64, "WiWiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_min_u64, "WUiWUiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_max_i64, "WiWiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_max_u64, "WUiWUiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_and_b64, "WiWiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_or_b64, "WiWiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_xor_b64, "WiWiZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_fadd_f32, "ffZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_fsub_f32, "ffZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_fmin_f32, "ffZi", "nc")
-BUILTIN(__builtin_amdgcn_wave_reduce_fmax_f32, "ffZi", "nc")
-
-//===----------------------------------------------------------------------===//
-// R600-NI only builtins.
-//===----------------------------------------------------------------------===//
-
-BUILTIN(__builtin_r600_implicitarg_ptr, "Uc*7", "nc")
-
-BUILTIN(__builtin_r600_read_tgid_x, "Ui", "nc")
-BUILTIN(__builtin_r600_read_tgid_y, "Ui", "nc")
-BUILTIN(__builtin_r600_read_tgid_z, "Ui", "nc")
-
-BUILTIN(__builtin_r600_read_tidig_x, "Ui", "nc")
-BUILTIN(__builtin_r600_read_tidig_y, "Ui", "nc")
-BUILTIN(__builtin_r600_read_tidig_z, "Ui", "nc")
-
-BUILTIN(__builtin_r600_recipsqrt_ieee, "dd", "nc")
-BUILTIN(__builtin_r600_recipsqrt_ieeef, "ff", "nc")
-
-//===----------------------------------------------------------------------===//
-// MFMA builtins.
-//===----------------------------------------------------------------------===//
-
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x1f32, "V32fffV32fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x1f32, "V16fffV16fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_4x4x1f32, "V4fffV4fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x2f32, "V16fffV16fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x4f32, "V4fffV4fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x4f16, "V32fV4xV4xV32fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x4f16, "V16fV4xV4xV16fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_4x4x4f16, "V4fV4xV4xV4fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x8f16, "V16fV4xV4xV16fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x16f16, "V4fV4xV4xV4fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_32x32x4i8, "V32iiiV32iIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_16x16x4i8, "V16iiiV16iIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_4x4x4i8, "V4iiiV4iIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_32x32x8i8, "V16iiiV16iIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_16x16x16i8, "V4iiiV4iIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x2bf16, "V32fV2sV2sV32fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x2bf16, "V16fV2sV2sV16fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_4x4x2bf16, "V4fV2sV2sV4fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x4bf16, "V16fV2sV2sV16fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x8bf16, "V4fV2sV2sV4fIiIiIi", "nc", "mai-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x4bf16_1k, "V32fV4sV4sV32fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x4bf16_1k, "V16fV4sV4sV16fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_4x4x4bf16_1k, "V4fV4sV4sV4fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x8bf16_1k, "V16fV4sV4sV16fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x16bf16_1k, "V4fV4sV4sV4fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f64_16x16x4f64, "V4dddV4dIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f64_4x4x4f64, "ddddIiIiIi", "nc", "mai-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_16x16x32_i8, "V4iWiWiV4iIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_32x32x16_i8, "V16iWiWiV16iIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x8_xf32, "V4fV2fV2fV4fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x4_xf32, "V16fV2fV2fV16fIiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_bf8_bf8, "V4fWiWiV4fIiIiIi", "nc", "fp8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_bf8_fp8, "V4fWiWiV4fIiIiIi", "nc", "fp8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_fp8_bf8, "V4fWiWiV4fIiIiIi", "nc", "fp8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_fp8_fp8, "V4fWiWiV4fIiIiIi", "nc", "fp8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_bf8_bf8, "V16fWiWiV16fIiIiIi", "nc", "fp8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_bf8_fp8, "V16fWiWiV16fIiIiIi", "nc", "fp8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_fp8_bf8, "V16fWiWiV16fIiIiIi", "nc", "fp8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8, "V16fWiWiV16fIiIiIi", "nc", "fp8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x32_f16, "V4fV4xV8xV4fiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x16_f16, "V16fV4xV8xV16fiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x32_bf16, "V4fV4sV8sV4fiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x16_bf16, "V16fV4sV8sV16fiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_16x16x64_i8, "V4iV2iV4iV4iiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_32x32x32_i8, "V16iV2iV4iV16iiIiIi", "nc", "mai-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_bf8_bf8, "V4fV2iV4iV4fiIiIi", "nc", "fp8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_bf8_fp8, "V4fV2iV4iV4fiIiIi", "nc", "fp8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_fp8_bf8, "V4fV2iV4iV4fiIiIi", "nc", "fp8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_fp8_fp8, "V4fV2iV4iV4fiIiIi", "nc", "fp8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_bf8_bf8, "V16fV2iV4iV16fiIiIi", "nc", "fp8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_bf8_fp8, "V16fV2iV4iV16fiIiIi", "nc", "fp8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_fp8_bf8, "V16fV2iV4iV16fiIiIi", "nc", "fp8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8, "V16fV2iV4iV16fiIiIi", "nc", "fp8-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_bf8, "fiIi", "nc", "fp8-conversion-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_fp8, "fiIi", "nc", "fp8-conversion-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_fp8_e5m3, "fiIi", "nc", "fp8e5m3-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_bf8, "V2fiIb", "nc", "fp8-conversion-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_fp8, "V2fiIb", "nc", "fp8-conversion-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f32, "iffiIb", "nc", "fp8-conversion-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-conversion-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-conversion-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-conversion-insts")
-
-//===----------------------------------------------------------------------===//
-// GFX950 only builtins.
-//===----------------------------------------------------------------------===//
-TARGET_BUILTIN(__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4, "V4fV8ZiV8ZiV4fIiIiIiiIii", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4, "V16fV8ZiV8ZiV16fIiIiIiiIii", "nc", "gfx950-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_f16, "V4fV8xV8xV4fIiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_bf16, "V4fV8yV8yV4fIiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_f16, "V16fV8xV8xV16fIiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x16_bf16, "V16fV8yV8yV16fIiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_16x16x64_i8, "V4iV4iV4iV4iIiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_32x32x32_i8, "V16iV4iV4iV16iIiIiIi", "nc", "gfx950-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_f16, "V4fV8xV16xV4fiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_f16, "V16fV8xV16xV16fiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x64_bf16, "V4fV8yV16yV4fiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_bf16, "V16fV8yV16yV16fiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_16x16x128_i8, "V4iV4iV8iV4iiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_32x32x64_i8, "V16iV4iV8iV16iiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_bf8_bf8, "V4fV4iV8iV4fiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_bf8_fp8, "V4fV4iV8iV4fiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_fp8_bf8, "V4fV4iV8iV4fiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8, "V4fV4iV8iV4fiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_permlane16_swap, "V2UiUiUiIbIb", "nc", "permlane16-swap")
-TARGET_BUILTIN(__builtin_amdgcn_permlane32_swap, "V2UiUiUiIbIb", "nc", "permlane32-swap")
-
-TARGET_BUILTIN(__builtin_amdgcn_ds_read_tr4_b64_v2i32, "V2iV2i*3", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_ds_read_tr6_b96_v3i32, "V3iV3i*3", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_ds_read_tr8_b64_v2i32, "V2iV2i*3", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_ds_read_tr16_b64_v4i16, "V4sV4s*3", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_ds_read_tr16_b64_v4f16, "V4hV4h*3", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_ds_read_tr16_b64_v4bf16, "V4yV4y*3", "nc", "gfx950-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_ashr_pk_i8_i32, "UsUiUiUi", "nc", "ashr-pk-insts")
-TARGET_BUILTIN(__builtin_amdgcn_ashr_pk_u8_i32, "UsUiUiUi", "nc", "ashr-pk-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_2xpk16_fp6_f32, "V6UiV16fV16ff", "nc", "gfx950-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_2xpk16_bf6_f32, "V6UiV16fV16ff", "nc", "gfx950-insts")
-
-//===----------------------------------------------------------------------===//
-// GFX12+ only builtins.
-//===----------------------------------------------------------------------===//
-
-TARGET_BUILTIN(__builtin_amdgcn_s_sleep_var, "vUi", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_permlane16_var,  "UiUiUiUiIbIb", "nc", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_permlanex16_var, "UiUiUiUiIbIb", "nc", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal, "vIi", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_var, "vv*i", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_wait, "vIs", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_isfirst, "bIi", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_init, "vv*i", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_join, "vv*", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_leave, "vIs", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_get_barrier_state, "Uii", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_get_named_barrier_state, "Uiv*", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_prefetch_data, "vvC*Ui", "nc", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_buffer_prefetch_data, "vQbIiUi", "nc", "gfx12-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_b64_v2i32, "V2iV2i*1", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_b128_v8i16, "V8sV8s*1", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_b128_v8f16, "V8hV8h*1", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_b128_v8bf16, "V8yV8y*1", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_b64_i32, "ii*1", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_b128_v4i16, "V4sV4s*1", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_b128_v4f16, "V4hV4h*1", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_b128_v4bf16, "V4yV4y*1", "nc", "gfx12-insts,wavefrontsize64")
-
-TARGET_BUILTIN(__builtin_amdgcn_ds_bpermute_fi_b32, "iii", "nc", "gfx12-insts")
-
-// For the following two builtins, the second and third return values of the
-// intrinsics are returned through the last two pointer-type function arguments.
-TARGET_BUILTIN(__builtin_amdgcn_image_bvh8_intersect_ray, "V10UiWUifUcV3fV3fUiV4UiV3f*V3f*", "nc", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_bvh_dual_intersect_ray, "V10UiWUifUcV3fV3fV2UiV4UiV3f*V3f*", "nc", "gfx12-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn, "V2UiUiUiV4UiIi", "n", "gfx11-insts")
-TARGET_BUILTIN(__builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn, "V2UiUiUiV8UiIi", "n", "gfx12-insts")
-
-// The intrinsic returns {i64, i32}, the builtin returns <2 x i64>.
-// The second return value of the intrinsic is zext'ed.
-TARGET_BUILTIN(__builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn, "V2WUiUiUiV8UiIi", "n", "gfx12-insts")
-
-//===----------------------------------------------------------------------===//
-// WMMA builtins.
-// Postfix w32 indicates the builtin requires wavefront size of 32.
-// Postfix w64 indicates the builtin requires wavefront size of 64.
-//
-// Some of these are very similar to their GFX11 counterparts, but they don't
-// require replication of the A,B matrices, so they use fewer vector elements.
-// Therefore, we add an "_gfx12" suffix to distinguish them from the existing
-// builtins.
-//===----------------------------------------------------------------------===//
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12, "V8fV8xV8xV8f", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12, "V8fV8sV8sV8f", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12, "V8xV8xV8xV8x", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12, "V8sV8sV8sV8s", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12, "V8iIbV2iIbV2iV8iIb", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12, "V8iIbiIbiV8iIb", "nc", "gfx12-insts,wavefrontsize32")
-// These are gfx12-only, but for consistency with the other WMMA variants we're
-// keeping the "_gfx12" suffix.
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12, "V8fV2iV2iV8f", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12, "V8fV2iV2iV8f", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12, "V8fV2iV2iV8f", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12, "V8fV2iV2iV8f", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12, "V8iIbV2iIbV2iV8iIb", "nc", "gfx12-insts,wavefrontsize32")
-
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12, "V4fV4xV4xV4f", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12, "V4fV4sV4sV4f", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12, "V4xV4xV4xV4x", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12, "V4sV4sV4sV4s", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12, "V4iIbiIbiV4iIb", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12, "V4iIbiIbiV4iIb", "nc", "gfx12-insts,wavefrontsize64")
-// These are gfx12-only, but for consistency with the other WMMA variants we're
-// keeping the "_gfx12" suffix.
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12, "V4fiiV4f", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12, "V4fiiV4f", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12, "V4fiiV4f", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12, "V4fiiV4f", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12, "V4iIbiIbiV4iIb", "nc", "gfx12-insts,wavefrontsize64")
-
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32, "V8fV8hV16hV8fi", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32, "V8fV8sV16sV8fi", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32, "V8hV8hV16hV8hi", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32, "V8sV8sV16sV8si", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32, "V8iIbV2iIbV4iV8iiIb", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32, "V8iIbiIbV2iV8iiIb", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32, "V8iIbV2iIbV4iV8iiIb", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32, "V8fV2iV4iV8fi", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32, "V8fV2iV4iV8fi", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32, "V8fV2iV4iV8fi", "nc", "gfx12-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32, "V8fV2iV4iV8fi", "nc", "gfx12-insts,wavefrontsize32")
-
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64, "V4fV4hV8hV4fi", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64, "V4fV4sV8sV4fi", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64, "V4hV4hV8hV4hi", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64, "V4sV4sV8sV4si", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64, "V4iIbiIbV2iV4iiIb", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64, "V4iIbiIbiV4iiIb", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64, "V4iIbiIbV2iV4iiIb", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64, "V4fiV2iV4fi", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64, "V4fiV2iV4fi", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64, "V4fiV2iV4fi", "nc", "gfx12-insts,wavefrontsize64")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64, "V4fiV2iV4fi", "nc", "gfx12-insts,wavefrontsize64")
-
-TARGET_BUILTIN(__builtin_amdgcn_prng_b32, "UiUi", "nc", "prng-inst")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_fp6_f16, "V6UiV32xf", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_bf6_f16, "V6UiV32xf", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_fp6_bf16, "V6UiV32yf", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_bf6_bf16, "V6UiV32yf", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_f16_fp8, "V2xV2xifIiIb", "nc", "fp8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_f16_bf8, "V2xV2xifIiIb", "nc", "bf8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_f32_fp8, "fifIi", "nc", "fp8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_f32_bf8, "fifIi", "nc", "bf8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_fp8_f32, "V2sV2sfffIb", "nc", "fp8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_bf8_f32, "V2sV2sfffIb", "nc", "bf8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_f32_fp8, "V2fUifIb", "nc", "fp8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_f32_bf8, "V2fUifIb", "nc", "bf8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_fp8_f16, "V2sV2sV2xfIb", "nc", "fp8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_fp8_bf16, "V2sV2sV2yfIb", "nc", "fp8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_bf8_f16, "V2sV2sV2xfIb", "nc", "bf8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_bf8_bf16, "V2sV2sV2yfIb", "nc", "bf8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_f32_fp4, "V2fUifIi", "nc", "fp4-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_fp4_f32, "UiUifffIi", "nc", "fp4-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_f16_fp4, "V2xUifIi", "nc", "fp4-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_bf16_fp4, "V2yUifIi", "nc", "fp4-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_f32_fp6, "V32fV6Uif", "nc", "fp6bf6-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_f32_bf6, "V32fV6Uif", "nc", "fp6bf6-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_f16_fp6, "V32xV6Uif", "nc", "fp6bf6-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_bf16_fp6, "V32yV6Uif", "nc", "fp6bf6-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_f16_bf6, "V32xV6Uif", "nc", "fp6bf6-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk32_bf16_bf6, "V32yV6Uif", "nc", "fp6bf6-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_f16_fp8, "V2xUifIb", "nc", "fp8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_bf16_fp8, "V2yUifIb", "nc", "fp8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_f16_bf8, "V2xUifIb", "nc", "bf8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_bf16_bf8, "V2yUifIb", "nc", "bf8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_fp4_f16, "UiUiV2xfIi", "nc", "fp4-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk_fp4_bf16, "UiUiV2yfIi", "nc", "fp4-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f16, "UiUiV2xUifIi", "nc", "fp4-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk_fp4_bf16, "UiUiV2yUifIi", "nc", "fp4-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f32, "UiUiV2fUifIi", "nc", "fp4-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_bf8_bf16, "iiyUifIi", "nc", "bf8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_bf8_f16, "iixUifIi", "nc", "bf8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_bf8_f32, "iifUifIi", "nc", "bf8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_fp8_bf16, "iiyUifIi", "nc", "fp8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_fp8_f16, "iixUifIi", "nc", "fp8-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_fp8_f32, "iifUifIi", "nc", "fp8-cvt-scale-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16, "V6UiV32yUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f16, "V6UiV32xUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f32, "V6UiV32fUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16, "V6UiV32yUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f16, "V6UiV32xUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f32, "V6UiV32fUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
-TARGET_BUILTIN(__builtin_amdgcn_bitop3_b32, "iiiiIUi", "nc", "bitop3-insts")
-TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "ssssIUi", "nc", "bitop3-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2xV2xfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts")
-
-//===----------------------------------------------------------------------===//
-// GFX1250+ only builtins.
-//===----------------------------------------------------------------------===//
-TARGET_BUILTIN(__builtin_amdgcn_s_cluster_barrier, "v", "n", "gfx1250-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_flat_prefetch, "vvC*0Ii", "nc", "vmem-pref-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_prefetch, "vvC*1Ii", "nc", "vmem-pref-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_global_load_monitor_b32, "ii*1Ii", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_monitor_b64, "V2iV2i*1Ii", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_monitor_b128, "V4iV4i*1Ii", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_flat_load_monitor_b32, "ii*0Ii", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_flat_load_monitor_b64, "V2iV2i*0Ii", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_flat_load_monitor_b128, "V4iV4i*0Ii", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cluster_load_b32, "ii*1Iii", "nc", "mcast-load-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_cluster_load_b64, "V2iV2i*1Iii", "nc", "mcast-load-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_cluster_load_b128, "V4iV4i*1Iii", "nc", "mcast-load-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_cluster_load_async_to_lds_b8, "vc*1c*3IiIii", "nc", "mcast-load-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_cluster_load_async_to_lds_b32, "vi*1i*3IiIii", "nc", "mcast-load-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_cluster_load_async_to_lds_b64, "vV2i*1V2i*3IiIii", "nc", "mcast-load-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_cluster_load_async_to_lds_b128, "vV4i*1V4i*3IiIii", "nc", "mcast-load-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_async_to_lds_b8, "vc*1c*3IiIi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_async_to_lds_b32, "vi*1i*3IiIi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_async_to_lds_b64, "vV2i*1V2i*3IiIi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_async_to_lds_b128, "vV4i*1V4i*3IiIi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_store_async_from_lds_b8, "vc*1c*3IiIi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_store_async_from_lds_b32, "vi*1i*3IiIi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_store_async_from_lds_b64, "vV2i*1V2i*3IiIi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_store_async_from_lds_b128, "vV4i*1V4i*3IiIi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_async_barrier_arrive_b64, "vLi*3", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_barrier_arrive_rtn_b64, "LiLi*3Li", "nc", "gfx1250-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_tensor_load_to_lds, "vV4iV8iV4iV4iIi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_tensor_load_to_lds_d2, "vV4iV8iIi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_tensor_store_from_lds, "vV4iV8iV4iV4iIi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_tensor_store_from_lds_d2, "vV4iV8iIi", "nc", "gfx1250-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_global_load_tr4_b64_v2i32, "V2iV2i*1", "nc", "transpose-load-f4f6-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_tr8_b64_v2i32, "V2iV2i*1", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_tr6_b96_v3i32, "V3iV3i*1", "nc", "transpose-load-f4f6-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_tr16_b128_v8i16, "V8sV8s*1", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_tr16_b128_v8f16, "V8hV8h*1", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_tr16_b128_v8bf16, "V8yV8y*1", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr4_b64_v2i32, "V2iV2i*3", "nc", "transpose-load-f4f6-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr8_b64_v2i32, "V2iV2i*3", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr6_b96_v3i32, "V3iV3i*3", "nc", "transpose-load-f4f6-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr16_b128_v8i16, "V8sV8s*3", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr16_b128_v8f16, "V8hV8h*3", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr16_b128_v8bf16, "V8yV8y*3", "nc", "gfx1250-insts,wavefrontsize32")
-
-TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")
-TARGET_BUILTIN(__builtin_amdgcn_s_monitor_sleep,  "vIs", "n", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vv*", "n", "s-wakeup-barrier-inst")
-
-TARGET_BUILTIN(__builtin_amdgcn_s_wait_asynccnt, "vIUs", "n", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_tanhf, "ff", "nc", "tanh-insts")
-TARGET_BUILTIN(__builtin_amdgcn_tanhh, "hh", "nc", "tanh-insts")
-TARGET_BUILTIN(__builtin_amdgcn_tanh_bf16, "yy", "nc", "bf16-trans-insts")
-TARGET_BUILTIN(__builtin_amdgcn_rcp_bf16, "yy", "nc", "bf16-trans-insts")
-TARGET_BUILTIN(__builtin_amdgcn_sqrt_bf16, "yy", "nc", "bf16-trans-insts")
-TARGET_BUILTIN(__builtin_amdgcn_rsq_bf16, "yy", "nc", "bf16-trans-insts")
-TARGET_BUILTIN(__builtin_amdgcn_log_bf16, "yy", "nc", "bf16-trans-insts")
-TARGET_BUILTIN(__builtin_amdgcn_exp2_bf16, "yy", "nc", "bf16-trans-insts")
-TARGET_BUILTIN(__builtin_amdgcn_sin_bf16, "yy", "nc", "bf16-trans-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cos_bf16, "yy", "nc", "bf16-trans-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_pk_bf16_f32, "V2yffi", "nc", "bf16-cvt-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_pk_f16_f32, "V2hffi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f16, "sV2h", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f16, "sV2h", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f16, "ihiUiIi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f16, "ihiUiIi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f16_fp8, "V8hV2UiUiIUi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_bf16_fp8, "V8yV2UiUiIUi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f16_bf8, "V8hV2UiUiIUi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_bf16_bf8, "V8yV2UiUiIUi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f16_fp4, "V8hUiUiIUi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_bf16_fp4, "V8yUiUiIUi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f32_fp8, "V8fV2UiUiIUi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f32_bf8, "V8fV2UiUiIUi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f32_fp4, "V8fUiUiIUi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk16_f16_fp6, "V16hV3UiUiIUi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk16_bf16_fp6, "V16yV3UiUiIUi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk16_f16_bf6, "V16hV3UiUiIUi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk16_bf16_bf6, "V16yV3UiUiIUi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk16_f32_fp6, "V16fV3UiUiIUi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk16_f32_bf6, "V16fV3UiUiIUi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_fp8_bf16, "V2UiV8yf", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_bf8_bf16, "V2UiV8yf", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_fp8_f16, "V2UiV8hf", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_bf8_f16, "V2UiV8hf", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_fp8_f32, "V2UiV8ff", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_bf8_f32, "V2UiV8ff", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_fp4_f32, "UiV8ff", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_fp4_f16, "UiV8hf", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_fp4_bf16, "UiV8yf", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk16_fp6_f32, "V3UiV16ff", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk16_bf6_f32, "V3UiV16ff", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk16_fp6_f16, "V3UiV16hf", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk16_bf6_f16, "V3UiV16hf", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk16_fp6_bf16, "V3UiV16yf", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk16_bf6_bf16, "V3UiV16yf", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk8_fp8_bf16, "V2UiV8yUif", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk8_bf8_bf16, "V2UiV8yUif", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk8_fp8_f16, "V2UiV8hUif", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk8_bf8_f16, "V2UiV8hUif", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk8_fp8_f32, "V2UiV8fUif", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk8_bf8_f32, "V2UiV8fUif", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk8_fp4_f32, "UiV8fUif", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk8_fp4_f16, "UiV8hUif", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk8_fp4_bf16, "UiV8yUif", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk16_bf6_bf16, "V3UiV16yUif", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk16_bf6_f16, "V3UiV16hUif", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk16_bf6_f32, "V3UiV16fUif", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk16_fp6_bf16, "V3UiV16yUif", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk16_fp6_f16, "V3UiV16hUif", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk16_fp6_f32, "V3UiV16fUif", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32_e5m3, "iffiIb", "nc", "fp8e5m3-insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32_e5m3, "ifiiIi", "nc", "fp8e5m3-insts")
-TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_i4_i8, "UsUi", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_u4_u8, "UsUi", "nc", "gfx1250-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_permlane_bcast, "iiii", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_permlane_up, "iiii", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_permlane_down, "iiii", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_permlane_xor, "iiii", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_permlane_idx_gen, "iii", "nc", "gfx1250-insts,wavefrontsize32")
-
-TARGET_BUILTIN(__builtin_amdgcn_perm_pk16_b4_u4, "V2UiUiUiV2Ui", "nc", "tensor-cvt-lut-insts")
-TARGET_BUILTIN(__builtin_amdgcn_perm_pk16_b6_u4, "V3UiUiULiV2Ui", "nc", "tensor-cvt-lut-insts")
-TARGET_BUILTIN(__builtin_amdgcn_perm_pk16_b8_u4, "V4UiULiULiV2Ui", "nc", "tensor-cvt-lut-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_add_max_i32, "iiiiIb", "nc", "add-min-max-insts")
-TARGET_BUILTIN(__builtin_amdgcn_add_max_u32, "UiUiUiUiIb", "nc", "add-min-max-insts")
-TARGET_BUILTIN(__builtin_amdgcn_add_min_i32, "iiiiIb", "nc", "add-min-max-insts")
-TARGET_BUILTIN(__builtin_amdgcn_add_min_u32, "UiUiUiUiIb", "nc", "add-min-max-insts")
-TARGET_BUILTIN(__builtin_amdgcn_pk_add_max_i16, "V2sV2sV2sV2sIb", "nc", "pk-add-min-max-insts")
-TARGET_BUILTIN(__builtin_amdgcn_pk_add_max_u16, "V2UsV2UsV2UsV2UsIb", "nc", "pk-add-min-max-insts")
-TARGET_BUILTIN(__builtin_amdgcn_pk_add_min_i16, "V2sV2sV2sV2sIb", "nc", "pk-add-min-max-insts")
-TARGET_BUILTIN(__builtin_amdgcn_pk_add_min_u16, "V2UsV2UsV2UsV2UsIb", "nc", "pk-add-min-max-insts")
-
-// GFX1250 WMMA builtins
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x4_f32, "V8fIbV2fIbV2fIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x32_bf16, "V8fIbV16yIbV16yIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16_16x16x32_bf16, "V8yIbV16yIbV16yIsV8yIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16, "V8yIbV16yIbV16yIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8, "V8fV8iV8iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8, "V8fV8iV8iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8, "V8fV8iV8iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8, "V8fV8iV8iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb.", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4, "V8fIiV16iIiV16iIsV8f", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8, "V8fV16iV16iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8, "V8fV16iV16iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8, "V8fV16iV16iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8, "V8fV16iV16iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4, "V8fIiV16iIiV16iIsV8fIiIiiIiIiiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4, "V8fIiV16iIiV16iIsV8fIiIiLiIiIiLiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x32_f16, "V8fIbV16hIbV16hIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x32_f16, "V8hIbV16hIbV16hIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_32x16x128_f4, "V16fV16iV8iIsV16f", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_scale_f32_32x16x128_f4, "V16fV16iV8iIsV16fIiIiiIiIiiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_scale16_f32_32x16x128_f4, "V16fV16iV8iIsV16fIiIiLiIiIiLiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x64_bf16, "V8fIbV16yIbV32yV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x64_bf16, "V8yIbV16yIbV32yV8yiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16, "V8fIbV16yIbV32yV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8, "V8fV8iV16iV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8, "V8fV8iV16iV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8, "V8fV8iV16iV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8, "V8fV8iV16iV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x128_iu8, "V8iIbV8iIbV16iV8iiIbIb.", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x64_f16, "V8fIbV16hIbV32hV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x64_f16, "V8hIbV16hIbV32hV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-
-// GFX12.5 128B cooperative atomics
-TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_32x4B,  "ii*IicC*",  "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_32x4B, "vi*iIicC*", "nc", "gfx1250-insts,wavefrontsize32")
-
-TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_16x8B,  "V2iV2i*IicC*",  "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_16x8B, "vV2i*V2iIicC*", "nc", "gfx1250-insts,wavefrontsize32")
-
-TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_8x16B,  "V4iV4i*IicC*",  "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_8x16B, "vV4i*V4iIicC*", "nc", "gfx1250-insts,wavefrontsize32")
-
-//===----------------------------------------------------------------------===//
-// Image builtins
-//===----------------------------------------------------------------------===//
-TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f32_i32, "V4fiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f16_i32, "V4xiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f32_i32, "V4fiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f16_i32, "V4xiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f32_i32, "V4fiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f16_i32, "V4xiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_f32_i32, "fiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f32_i32, "V4fiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f16_i32, "V4xiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f32_i32, "V4fiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f16_i32, "V4xiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f32_i32, "V4fiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f16_i32, "V4xiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f32_i32, "V4fiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f16_i32, "V4xiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f32_i32, "V4fiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f16_i32, "V4xiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_f32_i32, "fiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f32_i32, "V4fiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f16_i32, "V4xiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_f32_i32, "fiiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f32_i32, "V4fiiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f16_i32, "V4xiiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f32_i32, "V4fiiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f16_i32, "V4xiiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f32_i32, "V4fiiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f16_i32, "V4xiiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f32_i32, "vV4fiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f16_i32, "vV4xiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f32_i32, "vV4fiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f16_i32, "vV4xiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_f32_i32, "vfiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f32_i32, "vV4fiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f16_i32, "vV4xiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_f32_i32, "vfiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f32_i32, "vV4fiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f16_i32, "vV4xiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f32_i32, "vV4fiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f16_i32, "vV4xiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f32_i32, "vV4fiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f16_i32, "vV4xiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f32_i32, "vV4fiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f16_i32, "vV4xiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f32_i32, "vV4fiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f16_i32, "vV4xiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_f32_i32, "vfiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f32_i32, "vV4fiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f16_i32, "vV4xiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_f32_i32, "vfiiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f32_i32, "vV4fiiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f16_i32, "vV4xiiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f32_i32, "vV4fiiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f16_i32, "vV4xiiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f32_i32, "vV4fiiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f16_i32, "vV4xiiiiiQtii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_1d_v4f32_f32, "V4fifQtV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_1d_v4f16_f32, "V4xifQtV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_1darray_v4f32_f32, "V4fiffQtV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_1darray_v4f16_f32, "V4xiffQtV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_f32_f32, "fiffQtV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_v4f32_f32, "V4fiffQtV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_v4f16_f32, "V4xiffQtV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_f32_f32, "fifffQtV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_v4f32_f32, "V4fifffQtV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_v4f16_f32, "V4xifffQtV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f32_f32, "V4fifffQtV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f16_f32, "V4xifffQtV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f32_f32, "V4fifffQtV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f16_f32, "V4xifffQtV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1d_v4f32_f32, "V4fifQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1d_v4f16_f32, "V4xifQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32, "V4xiffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2d_f32_f32, "fiffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2d_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2d_v4f16_f32, "V4xiffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2darray_f32_f32, "fifffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32, "V4xifffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_3d_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_3d_v4f16_f32, "V4xifffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_cube_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_cube_v4f16_f32, "V4xifffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1d_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1d_v4f16_f32, "V4xiffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1darray_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1darray_v4f16_f32, "V4xifffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2d_f32_f32, "fifffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2d_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2d_v4f16_f32, "V4xifffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2darray_f32_f32, "fiffffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2darray_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2darray_v4f16_f32, "V4xiffffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_3d_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_3d_v4f16_f32, "V4xiffffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_cube_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_cube_v4f16_f32, "V4xiffffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1d_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1d_v4f16_f32, "V4xifffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1darray_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1darray_v4f16_f32, "V4xiffffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2d_f32_f32, "fiffffffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2d_v4f32_f32, "V4fiffffffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2d_v4f16_f32, "V4xiffffffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2darray_f32_f32, "fifffffffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2darray_v4f32_f32, "V4fifffffffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2darray_v4f16_f32, "V4xifffffffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_3d_v4f32_f32, "V4fifffffffffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_3d_v4f16_f32, "V4xifffffffffQtV4ibii", "nc", "extended-image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts")
-
-#undef BUILTIN
-#undef TARGET_BUILTIN
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td b/clang/include/clang/Basic/BuiltinsAMDGPU.td
new file mode 100644
index 0000000000000..b7d1236549eee
--- /dev/null
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td
@@ -0,0 +1,1017 @@
+//===--- BuiltinsAMDGPU.td - AMDGPU Builtin function defs -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AMDGPU-specific builtin function database.
+//
+//===----------------------------------------------------------------------===//
+
+include "clang/Basic/BuiltinsBase.td"
+
+//===----------------------------------------------------------------------===//
+// AMDGPU builtin base classes
+//===----------------------------------------------------------------------===//
+
+class AMDGPUBuiltin<string prototype, list<Attribute> Attr = [], string Feat = ""> : TargetBuiltin {
+  let Spellings = [NAME];
+  let Prototype = prototype;
+  let Attributes = !listconcat([NoThrow], Attr);
+  let Features = Feat;
+}
+
+//===----------------------------------------------------------------------===//
+// SI+ only builtins.
+//===----------------------------------------------------------------------===//
+
+def __builtin_amdgcn_dispatch_ptr : AMDGPUBuiltin<"void address_space<4> *()", [Const]>;
+def __builtin_amdgcn_kernarg_segment_ptr : AMDGPUBuiltin<"void address_space<4> *()", [Const]>;
+def __builtin_amdgcn_implicitarg_ptr : AMDGPUBuiltin<"void address_space<4> *()", [Const]>;
+def __builtin_amdgcn_queue_ptr : AMDGPUBuiltin<"void address_space<4> *()", [Const]>;
+
+def __builtin_amdgcn_workgroup_id_x : AMDGPUBuiltin<"unsigned int()", [Const]>;
+def __builtin_amdgcn_workgroup_id_y : AMDGPUBuiltin<"unsigned int()", [Const]>;
+def __builtin_amdgcn_workgroup_id_z : AMDGPUBuiltin<"unsigned int()", [Const]>;
+
+def __builtin_amdgcn_cluster_id_x : AMDGPUBuiltin<"unsigned int()", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cluster_id_y : AMDGPUBuiltin<"unsigned int()", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cluster_id_z : AMDGPUBuiltin<"unsigned int()", [Const], "gfx1250-insts">;
+
+def __builtin_amdgcn_cluster_workgroup_id_x : AMDGPUBuiltin<"unsigned int()", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cluster_workgroup_id_y : AMDGPUBuiltin<"unsigned int()", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cluster_workgroup_id_z : AMDGPUBuiltin<"unsigned int()", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cluster_workgroup_flat_id : AMDGPUBuiltin<"unsigned int()", [Const], "gfx1250-insts">;
+
+def __builtin_amdgcn_cluster_workgroup_max_id_x : AMDGPUBuiltin<"unsigned int()", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cluster_workgroup_max_id_y : AMDGPUBuiltin<"unsigned int()", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cluster_workgroup_max_id_z : AMDGPUBuiltin<"unsigned int()", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cluster_workgroup_max_flat_id : AMDGPUBuiltin<"unsigned int()", [Const], "gfx1250-insts">;
+
+def __builtin_amdgcn_workitem_id_x : AMDGPUBuiltin<"unsigned int()", [Const]>;
+def __builtin_amdgcn_workitem_id_y : AMDGPUBuiltin<"unsigned int()", [Const]>;
+def __builtin_amdgcn_workitem_id_z : AMDGPUBuiltin<"unsigned int()", [Const]>;
+
+def __builtin_amdgcn_workgroup_size_x : AMDGPUBuiltin<"unsigned short()", [Const]>;
+def __builtin_amdgcn_workgroup_size_y : AMDGPUBuiltin<"unsigned short()", [Const]>;
+def __builtin_amdgcn_workgroup_size_z : AMDGPUBuiltin<"unsigned short()", [Const]>;
+
+def __builtin_amdgcn_grid_size_x : AMDGPUBuiltin<"unsigned int()", [Const]>;
+def __builtin_amdgcn_grid_size_y : AMDGPUBuiltin<"unsigned int()", [Const]>;
+def __builtin_amdgcn_grid_size_z : AMDGPUBuiltin<"unsigned int()", [Const]>;
+
+def __builtin_amdgcn_mbcnt_hi : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int)", [Const]>;
+def __builtin_amdgcn_mbcnt_lo : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int)", [Const]>;
+
+def __builtin_amdgcn_s_memtime : AMDGPUBuiltin<"uint64_t()", [], "s-memtime-inst">;
+
+//===----------------------------------------------------------------------===//
+// Instruction builtins.
+//===----------------------------------------------------------------------===//
+def __builtin_amdgcn_s_getreg : AMDGPUBuiltin<"unsigned int(_Constant int)">;
+def __builtin_amdgcn_s_setreg : AMDGPUBuiltin<"void(_Constant int, unsigned int)">;
+def __builtin_amdgcn_s_getpc : AMDGPUBuiltin<"uint64_t()">;
+def __builtin_amdgcn_s_waitcnt : AMDGPUBuiltin<"void(_Constant int)">;
+def __builtin_amdgcn_s_sendmsg : AMDGPUBuiltin<"void(_Constant int, unsigned int)">;
+def __builtin_amdgcn_s_sendmsghalt : AMDGPUBuiltin<"void(_Constant int, unsigned int)">;
+def __builtin_amdgcn_s_barrier : AMDGPUBuiltin<"void()">;
+def __builtin_amdgcn_s_ttracedata : AMDGPUBuiltin<"void(int)">;
+def __builtin_amdgcn_wave_barrier : AMDGPUBuiltin<"void()">;
+def __builtin_amdgcn_sched_barrier : AMDGPUBuiltin<"void(_Constant int)">;
+def __builtin_amdgcn_sched_group_barrier : AMDGPUBuiltin<"void(_Constant int, _Constant int, _Constant int)">;
+def __builtin_amdgcn_iglp_opt : AMDGPUBuiltin<"void(_Constant int)">;
+def __builtin_amdgcn_s_dcache_inv : AMDGPUBuiltin<"void()">;
+def __builtin_amdgcn_buffer_wbinvl1 : AMDGPUBuiltin<"void()">;
+def __builtin_amdgcn_fence : AMDGPUBuiltin<"void(unsigned int, char const *, ...)">;
+def __builtin_amdgcn_groupstaticsize : AMDGPUBuiltin<"unsigned int()">;
+def __builtin_amdgcn_wavefrontsize : AMDGPUBuiltin<"unsigned int()", [Const]>;
+
+def __builtin_amdgcn_atomic_inc32 : AMDGPUBuiltin<"uint32_t(uint32_t volatile *, uint32_t, unsigned int, char const *)">;
+def __builtin_amdgcn_atomic_inc64 : AMDGPUBuiltin<"uint64_t(uint64_t volatile *, uint64_t, unsigned int, char const *)">;
+
+def __builtin_amdgcn_atomic_dec32 : AMDGPUBuiltin<"uint32_t(uint32_t volatile *, uint32_t, unsigned int, char const *)">;
+def __builtin_amdgcn_atomic_dec64 : AMDGPUBuiltin<"uint64_t(uint64_t volatile *, uint64_t, unsigned int, char const *)">;
+
+// FIXME: Need to disallow constant address space.
+def __builtin_amdgcn_div_scale : AMDGPUBuiltin<"double(double, double, bool, bool *)">;
+def __builtin_amdgcn_div_scalef : AMDGPUBuiltin<"float(float, float, bool, bool *)">;
+def __builtin_amdgcn_div_fmas : AMDGPUBuiltin<"double(double, double, double, bool)", [Const]>;
+def __builtin_amdgcn_div_fmasf : AMDGPUBuiltin<"float(float, float, float, bool)", [Const]>;
+def __builtin_amdgcn_div_fixup : AMDGPUBuiltin<"double(double, double, double)", [Const]>;
+def __builtin_amdgcn_div_fixupf : AMDGPUBuiltin<"float(float, float, float)", [Const]>;
+def __builtin_amdgcn_trig_preop : AMDGPUBuiltin<"double(double, int)", [Const]>;
+def __builtin_amdgcn_trig_preopf : AMDGPUBuiltin<"float(float, int)", [Const]>;
+def __builtin_amdgcn_rcp : AMDGPUBuiltin<"double(double)", [Const]>;
+def __builtin_amdgcn_rcpf : AMDGPUBuiltin<"float(float)", [Const]>;
+def __builtin_amdgcn_sqrt : AMDGPUBuiltin<"double(double)", [Const]>;
+def __builtin_amdgcn_sqrtf : AMDGPUBuiltin<"float(float)", [Const]>;
+def __builtin_amdgcn_rsq : AMDGPUBuiltin<"double(double)", [Const]>;
+def __builtin_amdgcn_rsqf : AMDGPUBuiltin<"float(float)", [Const]>;
+def __builtin_amdgcn_rsq_clamp : AMDGPUBuiltin<"double(double)", [Const]>;
+def __builtin_amdgcn_rsq_clampf : AMDGPUBuiltin<"float(float)", [Const]>;
+def __builtin_amdgcn_sinf : AMDGPUBuiltin<"float(float)", [Const]>;
+def __builtin_amdgcn_cosf : AMDGPUBuiltin<"float(float)", [Const]>;
+def __builtin_amdgcn_logf : AMDGPUBuiltin<"float(float)", [Const]>;
+def __builtin_amdgcn_exp2f : AMDGPUBuiltin<"float(float)", [Const]>;
+def __builtin_amdgcn_log_clampf : AMDGPUBuiltin<"float(float)", [Const]>;
+def __builtin_amdgcn_ldexp : AMDGPUBuiltin<"double(double, int)", [Const]>;
+def __builtin_amdgcn_ldexpf : AMDGPUBuiltin<"float(float, int)", [Const]>;
+def __builtin_amdgcn_frexp_mant : AMDGPUBuiltin<"double(double)", [Const]>;
+def __builtin_amdgcn_frexp_mantf : AMDGPUBuiltin<"float(float)", [Const]>;
+def __builtin_amdgcn_frexp_exp : AMDGPUBuiltin<"int(double)", [Const]>;
+def __builtin_amdgcn_frexp_expf : AMDGPUBuiltin<"int(float)", [Const]>;
+def __builtin_amdgcn_fract : AMDGPUBuiltin<"double(double)", [Const]>;
+def __builtin_amdgcn_fractf : AMDGPUBuiltin<"float(float)", [Const]>;
+def __builtin_amdgcn_lerp : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int)", [Const], "lerp-inst">;
+def __builtin_amdgcn_class : AMDGPUBuiltin<"bool(double, int)", [Const]>;
+def __builtin_amdgcn_classf : AMDGPUBuiltin<"bool(float, int)", [Const]>;
+def __builtin_amdgcn_cubeid : AMDGPUBuiltin<"float(float, float, float)", [Const], "cube-insts">;
+def __builtin_amdgcn_cubesc : AMDGPUBuiltin<"float(float, float, float)", [Const], "cube-insts">;
+def __builtin_amdgcn_cubetc : AMDGPUBuiltin<"float(float, float, float)", [Const], "cube-insts">;
+def __builtin_amdgcn_cubema : AMDGPUBuiltin<"float(float, float, float)", [Const], "cube-insts">;
+def __builtin_amdgcn_s_sleep : AMDGPUBuiltin<"void(_Constant int)">;
+def __builtin_amdgcn_s_incperflevel : AMDGPUBuiltin<"void(_Constant int)">;
+def __builtin_amdgcn_s_decperflevel : AMDGPUBuiltin<"void(_Constant int)">;
+def __builtin_amdgcn_s_setprio : AMDGPUBuiltin<"void(_Constant short)">;
+def __builtin_amdgcn_ds_swizzle : AMDGPUBuiltin<"int(int, _Constant int)", [Const]>;
+def __builtin_amdgcn_ds_permute : AMDGPUBuiltin<"int(int, int)", [Const]>;
+def __builtin_amdgcn_ds_bpermute : AMDGPUBuiltin<"int(int, int)", [Const]>;
+def __builtin_amdgcn_readfirstlane : AMDGPUBuiltin<"int(int)", [Const]>;
+def __builtin_amdgcn_readlane : AMDGPUBuiltin<"int(int, int)", [Const]>;
+def __builtin_amdgcn_fmed3f : AMDGPUBuiltin<"float(float, float, float)", [Const]>;
+def __builtin_amdgcn_ds_faddf : AMDGPUBuiltin<"float(float address_space<3> *, float, _Constant int, _Constant int, _Constant bool)">;
+def __builtin_amdgcn_ds_fminf : AMDGPUBuiltin<"float(float address_space<3> *, float, _Constant int, _Constant int, _Constant bool)">;
+def __builtin_amdgcn_ds_fmaxf : AMDGPUBuiltin<"float(float address_space<3> *, float, _Constant int, _Constant int, _Constant bool)">;
+def __builtin_amdgcn_ds_append : AMDGPUBuiltin<"int(int address_space<3> *)">;
+def __builtin_amdgcn_ds_consume : AMDGPUBuiltin<"int(int address_space<3> *)">;
+def __builtin_amdgcn_alignbit : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int)", [Const]>;
+def __builtin_amdgcn_alignbyte : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int)", [Const]>;
+def __builtin_amdgcn_ubfe : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int)", [Const]>;
+def __builtin_amdgcn_sbfe : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int)", [Const]>;
+def __builtin_amdgcn_cvt_pkrtz : AMDGPUBuiltin<"_ExtVector<2, __fp16>(float, float)", [Const]>;
+def __builtin_amdgcn_cvt_pknorm_i16 : AMDGPUBuiltin<"_ExtVector<2, short>(float, float)", [Const], "cvt-pknorm-vop2-insts">;
+def __builtin_amdgcn_cvt_pknorm_u16 : AMDGPUBuiltin<"_ExtVector<2, unsigned short>(float, float)", [Const], "cvt-pknorm-vop2-insts">;
+def __builtin_amdgcn_cvt_pk_i16 : AMDGPUBuiltin<"_ExtVector<2, short>(int, int)", [Const]>;
+def __builtin_amdgcn_cvt_pk_u16 : AMDGPUBuiltin<"_ExtVector<2, unsigned short>(unsigned int, unsigned int)", [Const]>;
+def __builtin_amdgcn_cvt_pk_u8_f32 : AMDGPUBuiltin<"unsigned int(float, unsigned int, unsigned int)", [Const]>;
+def __builtin_amdgcn_cvt_off_f32_i4 : AMDGPUBuiltin<"float(int)", [Const]>;
+def __builtin_amdgcn_msad_u8 : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int)", [Const]>;
+def __builtin_amdgcn_sad_u8 : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int)", [Const], "sad-insts">;
+def __builtin_amdgcn_sad_hi_u8 : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int)", [Const], "sad-insts">;
+def __builtin_amdgcn_sad_u16 : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int)", [Const], "sad-insts">;
+def __builtin_amdgcn_qsad_pk_u16_u8 : AMDGPUBuiltin<"uint64_t(uint64_t, unsigned int, uint64_t)", [Const], "qsad-insts">;
+def __builtin_amdgcn_mqsad_pk_u16_u8 : AMDGPUBuiltin<"uint64_t(uint64_t, unsigned int, uint64_t)", [Const]>;
+def __builtin_amdgcn_mqsad_u32_u8 : AMDGPUBuiltin<"_Vector<4, unsigned int>(uint64_t, unsigned int, _Vector<4, unsigned int>)", [Const]>;
+
+def __builtin_amdgcn_make_buffer_rsrc : AMDGPUBuiltin<"__amdgpu_buffer_rsrc_t(void *, short, int64_t, int)", [Const]>;
+def __builtin_amdgcn_raw_buffer_store_b8 : AMDGPUBuiltin<"void(unsigned char, __amdgpu_buffer_rsrc_t, int, int, _Constant int)">;
+def __builtin_amdgcn_raw_buffer_store_b16 : AMDGPUBuiltin<"void(unsigned short, __amdgpu_buffer_rsrc_t, int, int, _Constant int)">;
+def __builtin_amdgcn_raw_buffer_store_b32 : AMDGPUBuiltin<"void(unsigned int, __amdgpu_buffer_rsrc_t, int, int, _Constant int)">;
+def __builtin_amdgcn_raw_buffer_store_b64 : AMDGPUBuiltin<"void(_Vector<2, unsigned int>, __amdgpu_buffer_rsrc_t, int, int, _Constant int)">;
+def __builtin_amdgcn_raw_buffer_store_b96 : AMDGPUBuiltin<"void(_Vector<3, unsigned int>, __amdgpu_buffer_rsrc_t, int, int, _Constant int)">;
+def __builtin_amdgcn_raw_buffer_store_b128 : AMDGPUBuiltin<"void(_Vector<4, unsigned int>, __amdgpu_buffer_rsrc_t, int, int, _Constant int)">;
+def __builtin_amdgcn_raw_buffer_load_b8 : AMDGPUBuiltin<"unsigned char(__amdgpu_buffer_rsrc_t, int, int, _Constant int)">;
+def __builtin_amdgcn_raw_buffer_load_b16 : AMDGPUBuiltin<"unsigned short(__amdgpu_buffer_rsrc_t, int, int, _Constant int)">;
+def __builtin_amdgcn_raw_buffer_load_b32 : AMDGPUBuiltin<"unsigned int(__amdgpu_buffer_rsrc_t, int, int, _Constant int)">;
+def __builtin_amdgcn_raw_buffer_load_b64 : AMDGPUBuiltin<"_Vector<2, unsigned int>(__amdgpu_buffer_rsrc_t, int, int, _Constant int)">;
+def __builtin_amdgcn_raw_buffer_load_b96 : AMDGPUBuiltin<"_Vector<3, unsigned int>(__amdgpu_buffer_rsrc_t, int, int, _Constant int)">;
+def __builtin_amdgcn_raw_buffer_load_b128 : AMDGPUBuiltin<"_Vector<4, unsigned int>(__amdgpu_buffer_rsrc_t, int, int, _Constant int)">;
+
+def __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32 : AMDGPUBuiltin<"int(int, __amdgpu_buffer_rsrc_t, int, int, _Constant int)">;
+
+def __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32 : AMDGPUBuiltin<"float(float, __amdgpu_buffer_rsrc_t, int, int, _Constant int)", [], "atomic-fadd-rtn-insts">;
+def __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16 : AMDGPUBuiltin<"_Vector<2, _Float16>(_Vector<2, _Float16>, __amdgpu_buffer_rsrc_t, int, int, _Constant int)", [], "atomic-buffer-global-pk-add-f16-insts">;
+
+def __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32 : AMDGPUBuiltin<"float(float, __amdgpu_buffer_rsrc_t, int, int, _Constant int)", [], "atomic-fmin-fmax-global-f32">;
+def __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32 : AMDGPUBuiltin<"float(float, __amdgpu_buffer_rsrc_t, int, int, _Constant int)", [], "atomic-fmin-fmax-global-f32">;
+def __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64 : AMDGPUBuiltin<"double(double, __amdgpu_buffer_rsrc_t, int, int, _Constant int)", [], "atomic-fmin-fmax-global-f64">;
+def __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64 : AMDGPUBuiltin<"double(double, __amdgpu_buffer_rsrc_t, int, int, _Constant int)", [], "atomic-fmin-fmax-global-f64">;
+
+def __builtin_amdgcn_raw_ptr_buffer_load_lds : AMDGPUBuiltin<"void(__amdgpu_buffer_rsrc_t, void address_space<3> *, _Constant unsigned int, int, int, _Constant int, _Constant int)", [], "vmem-to-lds-load-insts">;
+def __builtin_amdgcn_struct_ptr_buffer_load_lds : AMDGPUBuiltin<"void(__amdgpu_buffer_rsrc_t, void address_space<3> *, _Constant unsigned int, int, int, int, _Constant int, _Constant int)", [], "vmem-to-lds-load-insts">;
+
+//===----------------------------------------------------------------------===//
+// Ballot builtins.
+//===----------------------------------------------------------------------===//
+
+def __builtin_amdgcn_ballot_w32 : AMDGPUBuiltin<"uint32_t(bool)", [Const], "wavefrontsize32">;
+def __builtin_amdgcn_ballot_w64 : AMDGPUBuiltin<"uint64_t(bool)", [Const]>;
+
+def __builtin_amdgcn_inverse_ballot_w32 : AMDGPUBuiltin<"bool(uint32_t)", [Const], "wavefrontsize32">;
+def __builtin_amdgcn_inverse_ballot_w64 : AMDGPUBuiltin<"bool(uint64_t)", [Const], "wavefrontsize64">;
+
+// Deprecated intrinsics in favor of __builtin_amdgn_ballot_{w32|w64}
+def __builtin_amdgcn_uicmp : AMDGPUBuiltin<"uint64_t(unsigned int, unsigned int, _Constant int)", [Const]>;
+def __builtin_amdgcn_uicmpl : AMDGPUBuiltin<"uint64_t(uint64_t, uint64_t, _Constant int)", [Const]>;
+def __builtin_amdgcn_sicmp : AMDGPUBuiltin<"uint64_t(int, int, _Constant int)", [Const]>;
+def __builtin_amdgcn_sicmpl : AMDGPUBuiltin<"uint64_t(int64_t, int64_t, _Constant int)", [Const]>;
+def __builtin_amdgcn_fcmp : AMDGPUBuiltin<"uint64_t(double, double, _Constant int)", [Const]>;
+def __builtin_amdgcn_fcmpf : AMDGPUBuiltin<"uint64_t(float, float, _Constant int)", [Const]>;
+
+//===----------------------------------------------------------------------===//
+// Flat addressing builtins.
+//===----------------------------------------------------------------------===//
+def __builtin_amdgcn_is_shared : AMDGPUBuiltin<"bool(void const address_space<0> *)", [Const]>;
+def __builtin_amdgcn_is_private : AMDGPUBuiltin<"bool(void const address_space<0> *)", [Const]>;
+
+//===----------------------------------------------------------------------===//
+// GWS builtins.
+//===----------------------------------------------------------------------===//
+def __builtin_amdgcn_ds_gws_init : AMDGPUBuiltin<"void(unsigned int, unsigned int)", [], "gws">;
+def __builtin_amdgcn_ds_gws_barrier : AMDGPUBuiltin<"void(unsigned int, unsigned int)", [], "gws">;
+def __builtin_amdgcn_ds_gws_sema_v : AMDGPUBuiltin<"void(unsigned int)", [], "gws">;
+def __builtin_amdgcn_ds_gws_sema_br : AMDGPUBuiltin<"void(unsigned int, unsigned int)", [], "gws">;
+def __builtin_amdgcn_ds_gws_sema_p : AMDGPUBuiltin<"void(unsigned int)", [], "gws">;
+
+//===----------------------------------------------------------------------===//
+// CI+ only builtins.
+//===----------------------------------------------------------------------===//
+def __builtin_amdgcn_s_dcache_inv_vol : AMDGPUBuiltin<"void()", [], "ci-insts">;
+def __builtin_amdgcn_buffer_wbinvl1_vol : AMDGPUBuiltin<"void()", [], "ci-insts">;
+def __builtin_amdgcn_ds_gws_sema_release_all : AMDGPUBuiltin<"void(unsigned int)", [], "ci-insts">;
+
+//===----------------------------------------------------------------------===//
+// Interpolation builtins.
+//===----------------------------------------------------------------------===//
+def __builtin_amdgcn_interp_p1_f16 : AMDGPUBuiltin<"float(float, unsigned int, unsigned int, bool, unsigned int)", [Const]>;
+def __builtin_amdgcn_interp_p2_f16 : AMDGPUBuiltin<"__fp16(float, float, unsigned int, unsigned int, bool, unsigned int)", [Const]>;
+def __builtin_amdgcn_interp_p1 : AMDGPUBuiltin<"float(float, unsigned int, unsigned int, unsigned int)", [Const]>;
+def __builtin_amdgcn_interp_p2 : AMDGPUBuiltin<"float(float, float, unsigned int, unsigned int, unsigned int)", [Const]>;
+def __builtin_amdgcn_interp_mov : AMDGPUBuiltin<"float(unsigned int, unsigned int, unsigned int, unsigned int)", [Const]>;
+
+//===----------------------------------------------------------------------===//
+// VI+ only builtins.
+//===----------------------------------------------------------------------===//
+
+def __builtin_amdgcn_div_fixuph : AMDGPUBuiltin<"__fp16(__fp16, __fp16, __fp16)", [Const], "16-bit-insts">;
+def __builtin_amdgcn_rcph : AMDGPUBuiltin<"__fp16(__fp16)", [Const], "16-bit-insts">;
+def __builtin_amdgcn_sqrth : AMDGPUBuiltin<"__fp16(__fp16)", [Const], "16-bit-insts">;
+def __builtin_amdgcn_rsqh : AMDGPUBuiltin<"__fp16(__fp16)", [Const], "16-bit-insts">;
+def __builtin_amdgcn_sinh : AMDGPUBuiltin<"__fp16(__fp16)", [Const], "16-bit-insts">;
+def __builtin_amdgcn_cosh : AMDGPUBuiltin<"__fp16(__fp16)", [Const], "16-bit-insts">;
+def __builtin_amdgcn_ldexph : AMDGPUBuiltin<"__fp16(__fp16, int)", [Const], "16-bit-insts">;
+def __builtin_amdgcn_frexp_manth : AMDGPUBuiltin<"__fp16(__fp16)", [Const], "16-bit-insts">;
+def __builtin_amdgcn_frexp_exph : AMDGPUBuiltin<"short(__fp16)", [Const], "16-bit-insts">;
+def __builtin_amdgcn_fracth : AMDGPUBuiltin<"__fp16(__fp16)", [Const], "16-bit-insts">;
+def __builtin_amdgcn_classh : AMDGPUBuiltin<"bool(__fp16, int)", [Const], "16-bit-insts">;
+def __builtin_amdgcn_s_memrealtime : AMDGPUBuiltin<"uint64_t()", [], "s-memrealtime">;
+def __builtin_amdgcn_mov_dpp : AMDGPUBuiltin<"int(int, _Constant int, _Constant int, _Constant int, _Constant bool)", [Const, CustomTypeChecking], "dpp">;
+def __builtin_amdgcn_update_dpp : AMDGPUBuiltin<"int(int, int, _Constant int, _Constant int, _Constant int, _Constant bool)", [Const, CustomTypeChecking], "dpp">;
+def __builtin_amdgcn_s_dcache_wb : AMDGPUBuiltin<"void()", [], "gfx8-insts">;
+def __builtin_amdgcn_perm : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int)", [Const], "gfx8-insts">;
+
+//===----------------------------------------------------------------------===//
+// GFX9+ only builtins.
+//===----------------------------------------------------------------------===//
+
+def __builtin_amdgcn_fmed3h : AMDGPUBuiltin<"__fp16(__fp16, __fp16, __fp16)", [Const], "gfx9-insts">;
+
+def __builtin_amdgcn_global_atomic_fadd_f64 : AMDGPUBuiltin<"double(double address_space<1> *, double)", [], "gfx90a-insts">;
+def __builtin_amdgcn_global_atomic_fadd_f32 : AMDGPUBuiltin<"float(float address_space<1> *, float)", [], "atomic-fadd-rtn-insts">;
+def __builtin_amdgcn_global_atomic_fadd_v2f16 : AMDGPUBuiltin<"_Vector<2, _Float16>(_Vector<2, _Float16 address_space<1> *>, _Vector<2, _Float16>)", [CustomTypeChecking], "atomic-buffer-global-pk-add-f16-insts">;
+def __builtin_amdgcn_global_atomic_fmin_f64 : AMDGPUBuiltin<"double(double address_space<1> *, double)", [], "gfx90a-insts">;
+def __builtin_amdgcn_global_atomic_fmax_f64 : AMDGPUBuiltin<"double(double address_space<1> *, double)", [], "gfx90a-insts">;
+
+def __builtin_amdgcn_flat_atomic_fadd_f64 : AMDGPUBuiltin<"double(double address_space<0> *, double)", [], "gfx90a-insts">;
+def __builtin_amdgcn_flat_atomic_fmin_f64 : AMDGPUBuiltin<"double(double address_space<0> *, double)", [], "gfx90a-insts">;
+def __builtin_amdgcn_flat_atomic_fmax_f64 : AMDGPUBuiltin<"double(double address_space<0> *, double)", [], "gfx90a-insts">;
+
+def __builtin_amdgcn_ds_atomic_fadd_f64 : AMDGPUBuiltin<"double(double address_space<3> *, double)", [], "gfx90a-insts">;
+def __builtin_amdgcn_ds_atomic_fadd_f32 : AMDGPUBuiltin<"float(float address_space<3> *, float)", [], "gfx8-insts">;
+
+def __builtin_amdgcn_flat_atomic_fadd_f32 : AMDGPUBuiltin<"float(float address_space<0> *, float)", [], "gfx940-insts">;
+def __builtin_amdgcn_flat_atomic_fadd_v2f16 : AMDGPUBuiltin<"_Vector<2, _Float16>(_Vector<2, _Float16 address_space<0> *>, _Vector<2, _Float16>)", [CustomTypeChecking], "atomic-flat-pk-add-16-insts">;
+def __builtin_amdgcn_flat_atomic_fadd_v2bf16 : AMDGPUBuiltin<"_Vector<2, short>(_Vector<2, short address_space<0> *>, _Vector<2, short>)", [CustomTypeChecking], "atomic-flat-pk-add-16-insts">;
+def __builtin_amdgcn_global_atomic_fadd_v2bf16 : AMDGPUBuiltin<"_Vector<2, short>(_Vector<2, short address_space<1> *>, _Vector<2, short>)", [CustomTypeChecking], "atomic-global-pk-add-bf16-inst">;
+def __builtin_amdgcn_ds_atomic_fadd_v2bf16 : AMDGPUBuiltin<"_Vector<2, short>(_Vector<2, short address_space<3> *>, _Vector<2, short>)", [CustomTypeChecking], "atomic-ds-pk-add-16-insts">;
+def __builtin_amdgcn_ds_atomic_fadd_v2f16 : AMDGPUBuiltin<"_Vector<2, _Float16>(_Vector<2, _Float16 address_space<3> *>, _Vector<2, _Float16>)", [CustomTypeChecking], "atomic-ds-pk-add-16-insts">;
+def __builtin_amdgcn_load_to_lds : AMDGPUBuiltin<"void(void *, void address_space<3> *, _Constant unsigned int, _Constant int, _Constant unsigned int)", [], "vmem-to-lds-load-insts">;
+def __builtin_amdgcn_global_load_lds : AMDGPUBuiltin<"void(void address_space<1> *, void address_space<3> *, _Constant unsigned int, _Constant int, _Constant unsigned int)", [], "vmem-to-lds-load-insts">;
+
+//===----------------------------------------------------------------------===//
+// Deep learning builtins.
+//===----------------------------------------------------------------------===//
+
+def __builtin_amdgcn_fdot2 : AMDGPUBuiltin<"float(_Vector<2, _Float16>, _Vector<2, _Float16>, float, _Constant bool)", [Const], "dot10-insts">;
+def __builtin_amdgcn_fdot2_f16_f16 : AMDGPUBuiltin<"_Float16(_Vector<2, _Float16>, _Vector<2, _Float16>, _Float16)", [Const], "dot9-insts">;
+def __builtin_amdgcn_fdot2_bf16_bf16 : AMDGPUBuiltin<"short(_Vector<2, short>, _Vector<2, short>, short)", [Const], "dot9-insts">;
+def __builtin_amdgcn_fdot2_f32_bf16 : AMDGPUBuiltin<"float(_Vector<2, short>, _Vector<2, short>, float, _Constant bool)", [Const], "dot12-insts">;
+def __builtin_amdgcn_sdot2 : AMDGPUBuiltin<"int(_Vector<2, short>, _Vector<2, short>, int, _Constant bool)", [Const], "dot2-insts">;
+def __builtin_amdgcn_udot2 : AMDGPUBuiltin<"unsigned int(_Vector<2, unsigned short>, _Vector<2, unsigned short>, unsigned int, _Constant bool)", [Const], "dot2-insts">;
+def __builtin_amdgcn_sdot4 : AMDGPUBuiltin<"int(int, int, int, _Constant bool)", [Const], "dot1-insts">;
+def __builtin_amdgcn_udot4 : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int, _Constant bool)", [Const], "dot7-insts">;
+def __builtin_amdgcn_sudot4 : AMDGPUBuiltin<"int(_Constant bool, int, _Constant bool, int, int, _Constant bool)", [Const], "dot8-insts">;
+def __builtin_amdgcn_sdot8 : AMDGPUBuiltin<"int(int, int, int, _Constant bool)", [Const], "dot1-insts">;
+def __builtin_amdgcn_udot8 : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int, _Constant bool)", [Const], "dot7-insts">;
+def __builtin_amdgcn_sudot8 : AMDGPUBuiltin<"int(_Constant bool, int, _Constant bool, int, int, _Constant bool)", [Const], "dot8-insts">;
+def __builtin_amdgcn_dot4_f32_fp8_bf8 : AMDGPUBuiltin<"float(unsigned int, unsigned int, float)", [Const], "dot11-insts">;
+def __builtin_amdgcn_dot4_f32_bf8_fp8 : AMDGPUBuiltin<"float(unsigned int, unsigned int, float)", [Const], "dot11-insts">;
+def __builtin_amdgcn_dot4_f32_fp8_fp8 : AMDGPUBuiltin<"float(unsigned int, unsigned int, float)", [Const], "dot11-insts">;
+def __builtin_amdgcn_dot4_f32_bf8_bf8 : AMDGPUBuiltin<"float(unsigned int, unsigned int, float)", [Const], "dot11-insts">;
+def __builtin_amdgcn_fdot2c_f32_bf16 : AMDGPUBuiltin<"float(_Vector<2, __bf16>, _Vector<2, __bf16>, float, _Constant bool)", [Const], "dot13-insts">;
+
+//===----------------------------------------------------------------------===//
+// GFX10+ only builtins.
+//===----------------------------------------------------------------------===//
+def __builtin_amdgcn_permlane16 : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int, unsigned int, _Constant bool, _Constant bool)", [Const], "gfx10-insts">;
+def __builtin_amdgcn_permlanex16 : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int, unsigned int, _Constant bool, _Constant bool)", [Const], "gfx10-insts">;
+def __builtin_amdgcn_mov_dpp8 : AMDGPUBuiltin<"unsigned int(unsigned int, _Constant unsigned int)", [Const, CustomTypeChecking], "gfx10-insts">;
+def __builtin_amdgcn_s_ttracedata_imm : AMDGPUBuiltin<"void(_Constant short)", [], "gfx10-insts">;
+
+//===----------------------------------------------------------------------===//
+// Raytracing builtins.
+// By default the 1st argument is i32 and the 4/5-th arguments are float4.
+// Postfix l indicates the 1st argument is i64.
+// Postfix h indicates the 4/5-th arguments are half4.
+//===----------------------------------------------------------------------===//
+def __builtin_amdgcn_image_bvh_intersect_ray : AMDGPUBuiltin<"_Vector<4, unsigned int>(unsigned int, float, _Vector<4, float>, _Vector<4, float>, _Vector<4, float>, _Vector<4, unsigned int>)", [Const], "gfx10-insts">;
+def __builtin_amdgcn_image_bvh_intersect_ray_h : AMDGPUBuiltin<"_Vector<4, unsigned int>(unsigned int, float, _Vector<4, float>, _Vector<4, _Float16>, _Vector<4, _Float16>, _Vector<4, unsigned int>)", [Const], "gfx10-insts">;
+def __builtin_amdgcn_image_bvh_intersect_ray_l : AMDGPUBuiltin<"_Vector<4, unsigned int>(uint64_t, float, _Vector<4, float>, _Vector<4, float>, _Vector<4, float>, _Vector<4, unsigned int>)", [Const], "gfx10-insts">;
+def __builtin_amdgcn_image_bvh_intersect_ray_lh : AMDGPUBuiltin<"_Vector<4, unsigned int>(uint64_t, float, _Vector<4, float>, _Vector<4, _Float16>, _Vector<4, _Float16>, _Vector<4, unsigned int>)", [Const], "gfx10-insts">;
+
+
+//===----------------------------------------------------------------------===//
+// GFX11+ only builtins.
+//===----------------------------------------------------------------------===//
+
+// TODO: This is a no-op in wave32. Should the builtin require wavefrontsize64?
+def __builtin_amdgcn_permlane64 : AMDGPUBuiltin<"unsigned int(unsigned int)", [Const], "gfx11-insts">;
+def __builtin_amdgcn_s_wait_event_export_ready : AMDGPUBuiltin<"void()", [], "gfx11-insts">;
+
+//===----------------------------------------------------------------------===//
+// WMMA builtins.
+// Postfix w32 indicates the builtin requires wavefront size of 32.
+// Postfix w64 indicates the builtin requires wavefront size of 64.
+//===----------------------------------------------------------------------===//
+def __builtin_amdgcn_wmma_f32_16x16x16_f16_w32 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<8, float>)", [Const], "gfx11-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x16_bf16_w32 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<16, short>, _Vector<16, short>, _Vector<8, float>)", [Const], "gfx11-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f16_16x16x16_f16_w32 : AMDGPUBuiltin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<16, _Float16>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32 : AMDGPUBuiltin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32 : AMDGPUBuiltin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<16, _Float16>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32 : AMDGPUBuiltin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_i32_16x16x16_iu8_w32 : AMDGPUBuiltin<"_Vector<8, int>(_Constant bool, _Vector<4, int>, _Constant bool, _Vector<4, int>, _Vector<8, int>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_i32_16x16x16_iu4_w32 : AMDGPUBuiltin<"_Vector<8, int>(_Constant bool, _Vector<2, int>, _Constant bool, _Vector<2, int>, _Vector<8, int>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize32">;
+
+def __builtin_amdgcn_wmma_f32_16x16x16_f16_w64 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<4, float>)", [Const], "gfx11-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_f32_16x16x16_bf16_w64 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<16, short>, _Vector<16, short>, _Vector<4, float>)", [Const], "gfx11-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_f16_16x16x16_f16_w64 : AMDGPUBuiltin<"_Vector<8, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<8, _Float16>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64 : AMDGPUBuiltin<"_Vector<8, short>(_Vector<16, short>, _Vector<16, short>, _Vector<8, short>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64 : AMDGPUBuiltin<"_Vector<8, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<8, _Float16>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64 : AMDGPUBuiltin<"_Vector<8, short>(_Vector<16, short>, _Vector<16, short>, _Vector<8, short>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_i32_16x16x16_iu8_w64 : AMDGPUBuiltin<"_Vector<4, int>(_Constant bool, _Vector<4, int>, _Constant bool, _Vector<4, int>, _Vector<4, int>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_i32_16x16x16_iu4_w64 : AMDGPUBuiltin<"_Vector<4, int>(_Constant bool, _Vector<2, int>, _Constant bool, _Vector<2, int>, _Vector<4, int>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize64">;
+
+def __builtin_amdgcn_s_sendmsg_rtn : AMDGPUBuiltin<"unsigned int(_Constant unsigned int)", [], "gfx11-insts">;
+def __builtin_amdgcn_s_sendmsg_rtnl : AMDGPUBuiltin<"uint64_t(_Constant unsigned int)", [], "gfx11-insts">;
+
+def __builtin_amdgcn_ds_bvh_stack_rtn : AMDGPUBuiltin<"_Vector<2, unsigned int>(unsigned int, unsigned int, _Vector<4, unsigned int>, _Constant int)", [], "gfx11-insts">;
+
+//===----------------------------------------------------------------------===//
+// Special builtins.
+//===----------------------------------------------------------------------===//
+def __builtin_amdgcn_read_exec : AMDGPUBuiltin<"uint64_t()", [Const]>;
+def __builtin_amdgcn_read_exec_lo : AMDGPUBuiltin<"unsigned int()", [Const]>;
+def __builtin_amdgcn_read_exec_hi : AMDGPUBuiltin<"unsigned int()", [Const]>;
+
+def __builtin_amdgcn_endpgm : AMDGPUBuiltin<"void()", [NoReturn]>;
+
+def __builtin_amdgcn_get_fpenv : AMDGPUBuiltin<"uint64_t()">;
+def __builtin_amdgcn_set_fpenv : AMDGPUBuiltin<"void(uint64_t)">;
+
+//===----------------------------------------------------------------------===//
+
+// Wave Reduction builtins.
+
+//===----------------------------------------------------------------------===//
+
+def __builtin_amdgcn_wave_reduce_add_u32 : AMDGPUBuiltin<"uint32_t(uint32_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_sub_u32 : AMDGPUBuiltin<"uint32_t(uint32_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_min_i32 : AMDGPUBuiltin<"int32_t(int32_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_min_u32 : AMDGPUBuiltin<"uint32_t(uint32_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_max_i32 : AMDGPUBuiltin<"int32_t(int32_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_max_u32 : AMDGPUBuiltin<"uint32_t(uint32_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_and_b32 : AMDGPUBuiltin<"int32_t(int32_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_or_b32 : AMDGPUBuiltin<"int32_t(int32_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_xor_b32 : AMDGPUBuiltin<"int32_t(int32_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_add_u64 : AMDGPUBuiltin<"uint64_t(uint64_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_sub_u64 : AMDGPUBuiltin<"uint64_t(uint64_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_min_i64 : AMDGPUBuiltin<"int64_t(int64_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_min_u64 : AMDGPUBuiltin<"uint64_t(uint64_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_max_i64 : AMDGPUBuiltin<"int64_t(int64_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_max_u64 : AMDGPUBuiltin<"uint64_t(uint64_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_and_b64 : AMDGPUBuiltin<"int64_t(int64_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_or_b64 : AMDGPUBuiltin<"int64_t(int64_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_xor_b64 : AMDGPUBuiltin<"int64_t(int64_t, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_fadd_f32 : AMDGPUBuiltin<"float(float, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_fsub_f32 : AMDGPUBuiltin<"float(float, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_fmin_f32 : AMDGPUBuiltin<"float(float, int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_fmax_f32 : AMDGPUBuiltin<"float(float, int32_t)", [Const]>;
+
+//===----------------------------------------------------------------------===//
+// R600-NI only builtins.
+//===----------------------------------------------------------------------===//
+
+def __builtin_r600_implicitarg_ptr : AMDGPUBuiltin<"unsigned char address_space<7> *()", [Const]>;
+
+def __builtin_r600_read_tgid_x : AMDGPUBuiltin<"unsigned int()", [Const]>;
+def __builtin_r600_read_tgid_y : AMDGPUBuiltin<"unsigned int()", [Const]>;
+def __builtin_r600_read_tgid_z : AMDGPUBuiltin<"unsigned int()", [Const]>;
+
+def __builtin_r600_read_tidig_x : AMDGPUBuiltin<"unsigned int()", [Const]>;
+def __builtin_r600_read_tidig_y : AMDGPUBuiltin<"unsigned int()", [Const]>;
+def __builtin_r600_read_tidig_z : AMDGPUBuiltin<"unsigned int()", [Const]>;
+
+def __builtin_r600_recipsqrt_ieee : AMDGPUBuiltin<"double(double)", [Const]>;
+def __builtin_r600_recipsqrt_ieeef : AMDGPUBuiltin<"float(float)", [Const]>;
+
+//===----------------------------------------------------------------------===//
+// MFMA builtins.
+//===----------------------------------------------------------------------===//
+
+def __builtin_amdgcn_mfma_f32_32x32x1f32 : AMDGPUBuiltin<"_Vector<32, float>(float, float, _Vector<32, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x1f32 : AMDGPUBuiltin<"_Vector<16, float>(float, float, _Vector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_4x4x1f32 : AMDGPUBuiltin<"_Vector<4, float>(float, float, _Vector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_32x32x2f32 : AMDGPUBuiltin<"_Vector<16, float>(float, float, _Vector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x4f32 : AMDGPUBuiltin<"_Vector<4, float>(float, float, _Vector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_32x32x4f16 : AMDGPUBuiltin<"_Vector<32, float>(_Vector<4, _Float16>, _Vector<4, _Float16>, _Vector<32, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x4f16 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<4, _Float16>, _Vector<4, _Float16>, _Vector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_4x4x4f16 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<4, _Float16>, _Vector<4, _Float16>, _Vector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_32x32x8f16 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<4, _Float16>, _Vector<4, _Float16>, _Vector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x16f16 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<4, _Float16>, _Vector<4, _Float16>, _Vector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_i32_32x32x4i8 : AMDGPUBuiltin<"_Vector<32, int>(int, int, _Vector<32, int>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_i32_16x16x4i8 : AMDGPUBuiltin<"_Vector<16, int>(int, int, _Vector<16, int>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_i32_4x4x4i8 : AMDGPUBuiltin<"_Vector<4, int>(int, int, _Vector<4, int>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_i32_32x32x8i8 : AMDGPUBuiltin<"_Vector<16, int>(int, int, _Vector<16, int>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_i32_16x16x16i8 : AMDGPUBuiltin<"_Vector<4, int>(int, int, _Vector<4, int>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_32x32x2bf16 : AMDGPUBuiltin<"_Vector<32, float>(_Vector<2, short>, _Vector<2, short>, _Vector<32, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x2bf16 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<2, short>, _Vector<2, short>, _Vector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_4x4x2bf16 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<2, short>, _Vector<2, short>, _Vector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_32x32x4bf16 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<2, short>, _Vector<2, short>, _Vector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x8bf16 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<2, short>, _Vector<2, short>, _Vector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+
+def __builtin_amdgcn_mfma_f32_32x32x4bf16_1k : AMDGPUBuiltin<"_Vector<32, float>(_Vector<4, short>, _Vector<4, short>, _Vector<32, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x4bf16_1k : AMDGPUBuiltin<"_Vector<16, float>(_Vector<4, short>, _Vector<4, short>, _Vector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUBuiltin<"_Vector<4, float>(_Vector<4, short>, _Vector<4, short>, _Vector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_32x32x8bf16_1k : AMDGPUBuiltin<"_Vector<16, float>(_Vector<4, short>, _Vector<4, short>, _Vector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUBuiltin<"_Vector<4, float>(_Vector<4, short>, _Vector<4, short>, _Vector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f64_16x16x4f64 : AMDGPUBuiltin<"_Vector<4, double>(double, double, _Vector<4, double>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f64_4x4x4f64 : AMDGPUBuiltin<"double(double, double, double, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+
+def __builtin_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUBuiltin<"_Vector<4, int>(int64_t, int64_t, _Vector<4, int>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_i32_32x32x16_i8 : AMDGPUBuiltin<"_Vector<16, int>(int64_t, int64_t, _Vector<16, int>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x8_xf32 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<2, float>, _Vector<2, float>, _Vector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_32x32x4_xf32 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<2, float>, _Vector<2, float>, _Vector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x32_bf8_bf8 : AMDGPUBuiltin<"_Vector<4, float>(int64_t, int64_t, _Vector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "fp8-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x32_bf8_fp8 : AMDGPUBuiltin<"_Vector<4, float>(int64_t, int64_t, _Vector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "fp8-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x32_fp8_bf8 : AMDGPUBuiltin<"_Vector<4, float>(int64_t, int64_t, _Vector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "fp8-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x32_fp8_fp8 : AMDGPUBuiltin<"_Vector<4, float>(int64_t, int64_t, _Vector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "fp8-insts">;
+def __builtin_amdgcn_mfma_f32_32x32x16_bf8_bf8 : AMDGPUBuiltin<"_Vector<16, float>(int64_t, int64_t, _Vector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "fp8-insts">;
+def __builtin_amdgcn_mfma_f32_32x32x16_bf8_fp8 : AMDGPUBuiltin<"_Vector<16, float>(int64_t, int64_t, _Vector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "fp8-insts">;
+def __builtin_amdgcn_mfma_f32_32x32x16_fp8_bf8 : AMDGPUBuiltin<"_Vector<16, float>(int64_t, int64_t, _Vector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "fp8-insts">;
+def __builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8 : AMDGPUBuiltin<"_Vector<16, float>(int64_t, int64_t, _Vector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "fp8-insts">;
+def __builtin_amdgcn_smfmac_f32_16x16x32_f16 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<4, _Float16>, _Vector<8, _Float16>, _Vector<4, float>, int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_smfmac_f32_32x32x16_f16 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<4, _Float16>, _Vector<8, _Float16>, _Vector<16, float>, int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_smfmac_f32_16x16x32_bf16 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<4, short>, _Vector<8, short>, _Vector<4, float>, int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_smfmac_f32_32x32x16_bf16 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<4, short>, _Vector<8, short>, _Vector<16, float>, int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_smfmac_i32_16x16x64_i8 : AMDGPUBuiltin<"_Vector<4, int>(_Vector<2, int>, _Vector<4, int>, _Vector<4, int>, int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_smfmac_i32_32x32x32_i8 : AMDGPUBuiltin<"_Vector<16, int>(_Vector<2, int>, _Vector<4, int>, _Vector<16, int>, int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_smfmac_f32_16x16x64_bf8_bf8 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<2, int>, _Vector<4, int>, _Vector<4, float>, int, _Constant int, _Constant int)", [Const], "fp8-insts">;
+def __builtin_amdgcn_smfmac_f32_16x16x64_bf8_fp8 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<2, int>, _Vector<4, int>, _Vector<4, float>, int, _Constant int, _Constant int)", [Const], "fp8-insts">;
+def __builtin_amdgcn_smfmac_f32_16x16x64_fp8_bf8 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<2, int>, _Vector<4, int>, _Vector<4, float>, int, _Constant int, _Constant int)", [Const], "fp8-insts">;
+def __builtin_amdgcn_smfmac_f32_16x16x64_fp8_fp8 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<2, int>, _Vector<4, int>, _Vector<4, float>, int, _Constant int, _Constant int)", [Const], "fp8-insts">;
+def __builtin_amdgcn_smfmac_f32_32x32x32_bf8_bf8 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<2, int>, _Vector<4, int>, _Vector<16, float>, int, _Constant int, _Constant int)", [Const], "fp8-insts">;
+def __builtin_amdgcn_smfmac_f32_32x32x32_bf8_fp8 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<2, int>, _Vector<4, int>, _Vector<16, float>, int, _Constant int, _Constant int)", [Const], "fp8-insts">;
+def __builtin_amdgcn_smfmac_f32_32x32x32_fp8_bf8 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<2, int>, _Vector<4, int>, _Vector<16, float>, int, _Constant int, _Constant int)", [Const], "fp8-insts">;
+def __builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<2, int>, _Vector<4, int>, _Vector<16, float>, int, _Constant int, _Constant int)", [Const], "fp8-insts">;
+
+def __builtin_amdgcn_cvt_f32_bf8 : AMDGPUBuiltin<"float(int, _Constant int)", [Const], "fp8-conversion-insts">;
+def __builtin_amdgcn_cvt_f32_fp8 : AMDGPUBuiltin<"float(int, _Constant int)", [Const], "fp8-conversion-insts">;
+def __builtin_amdgcn_cvt_f32_fp8_e5m3 : AMDGPUBuiltin<"float(int, _Constant int)", [Const], "fp8e5m3-insts">;
+def __builtin_amdgcn_cvt_pk_f32_bf8 : AMDGPUBuiltin<"_Vector<2, float>(int, _Constant bool)", [Const], "fp8-conversion-insts">;
+def __builtin_amdgcn_cvt_pk_f32_fp8 : AMDGPUBuiltin<"_Vector<2, float>(int, _Constant bool)", [Const], "fp8-conversion-insts">;
+def __builtin_amdgcn_cvt_pk_bf8_f32 : AMDGPUBuiltin<"int(float, float, int, _Constant bool)", [Const], "fp8-conversion-insts">;
+def __builtin_amdgcn_cvt_pk_fp8_f32 : AMDGPUBuiltin<"int(float, float, int, _Constant bool)", [Const], "fp8-conversion-insts">;
+def __builtin_amdgcn_cvt_sr_bf8_f32 : AMDGPUBuiltin<"int(float, int, int, _Constant int)", [Const], "fp8-conversion-insts">;
+def __builtin_amdgcn_cvt_sr_fp8_f32 : AMDGPUBuiltin<"int(float, int, int, _Constant int)", [Const], "fp8-conversion-insts">;
+
+//===----------------------------------------------------------------------===//
+// GFX950 only builtins.
+//===----------------------------------------------------------------------===//
+def __builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<8, int32_t>, _Vector<8, int32_t>, _Vector<4, float>, _Constant int, _Constant int, _Constant int, int, _Constant int, int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<8, int32_t>, _Vector<8, int32_t>, _Vector<16, float>, _Constant int, _Constant int, _Constant int, int, _Constant int, int)", [Const], "gfx950-insts">;
+
+def __builtin_amdgcn_mfma_f32_16x16x32_f16 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x32_bf16 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<8, __bf16>, _Vector<8, __bf16>, _Vector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_mfma_f32_32x32x16_f16 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_mfma_f32_32x32x16_bf16 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<8, __bf16>, _Vector<8, __bf16>, _Vector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_mfma_i32_16x16x64_i8 : AMDGPUBuiltin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>, _Constant int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_mfma_i32_32x32x32_i8 : AMDGPUBuiltin<"_Vector<16, int>(_Vector<4, int>, _Vector<4, int>, _Vector<16, int>, _Constant int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+
+def __builtin_amdgcn_smfmac_f32_16x16x64_f16 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<8, _Float16>, _Vector<16, _Float16>, _Vector<4, float>, int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_smfmac_f32_32x32x32_f16 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<8, _Float16>, _Vector<16, _Float16>, _Vector<16, float>, int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_smfmac_f32_16x16x64_bf16 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<8, __bf16>, _Vector<16, __bf16>, _Vector<4, float>, int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_smfmac_f32_32x32x32_bf16 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<8, __bf16>, _Vector<16, __bf16>, _Vector<16, float>, int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_smfmac_i32_16x16x128_i8 : AMDGPUBuiltin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, int>, _Vector<4, int>, int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_smfmac_i32_32x32x64_i8 : AMDGPUBuiltin<"_Vector<16, int>(_Vector<4, int>, _Vector<8, int>, _Vector<16, int>, int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_smfmac_f32_16x16x128_bf8_bf8 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<4, int>, _Vector<8, int>, _Vector<4, float>, int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_smfmac_f32_16x16x128_bf8_fp8 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<4, int>, _Vector<8, int>, _Vector<4, float>, int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_smfmac_f32_16x16x128_fp8_bf8 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<4, int>, _Vector<8, int>, _Vector<4, float>, int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<4, int>, _Vector<8, int>, _Vector<4, float>, int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<4, int>, _Vector<8, int>, _Vector<16, float>, int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<4, int>, _Vector<8, int>, _Vector<16, float>, int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<4, int>, _Vector<8, int>, _Vector<16, float>, int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<4, int>, _Vector<8, int>, _Vector<16, float>, int, _Constant int, _Constant int)", [Const], "gfx950-insts">;
+
+def __builtin_amdgcn_permlane16_swap : AMDGPUBuiltin<"_Vector<2, unsigned int>(unsigned int, unsigned int, _Constant bool, _Constant bool)", [Const], "permlane16-swap">;
+def __builtin_amdgcn_permlane32_swap : AMDGPUBuiltin<"_Vector<2, unsigned int>(unsigned int, unsigned int, _Constant bool, _Constant bool)", [Const], "permlane32-swap">;
+
+def __builtin_amdgcn_ds_read_tr4_b64_v2i32 : AMDGPUBuiltin<"_Vector<2, int>(_Vector<2, int address_space<3> *>)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_ds_read_tr6_b96_v3i32 : AMDGPUBuiltin<"_Vector<3, int>(_Vector<3, int address_space<3> *>)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_ds_read_tr8_b64_v2i32 : AMDGPUBuiltin<"_Vector<2, int>(_Vector<2, int address_space<3> *>)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_ds_read_tr16_b64_v4i16 : AMDGPUBuiltin<"_Vector<4, short>(_Vector<4, short address_space<3> *>)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_ds_read_tr16_b64_v4f16 : AMDGPUBuiltin<"_Vector<4, __fp16>(_Vector<4, __fp16 address_space<3> *>)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_ds_read_tr16_b64_v4bf16 : AMDGPUBuiltin<"_Vector<4, __bf16>(_Vector<4, __bf16 address_space<3> *>)", [Const], "gfx950-insts">;
+
+def __builtin_amdgcn_ashr_pk_i8_i32 : AMDGPUBuiltin<"unsigned short(unsigned int, unsigned int, unsigned int)", [Const], "ashr-pk-insts">;
+def __builtin_amdgcn_ashr_pk_u8_i32 : AMDGPUBuiltin<"unsigned short(unsigned int, unsigned int, unsigned int)", [Const], "ashr-pk-insts">;
+
+def __builtin_amdgcn_cvt_scalef32_2xpk16_fp6_f32 : AMDGPUBuiltin<"_Vector<6, unsigned int>(_Vector<16, float>, _Vector<16, float>, float)", [Const], "gfx950-insts">;
+def __builtin_amdgcn_cvt_scalef32_2xpk16_bf6_f32 : AMDGPUBuiltin<"_Vector<6, unsigned int>(_Vector<16, float>, _Vector<16, float>, float)", [Const], "gfx950-insts">;
+
+//===----------------------------------------------------------------------===//
+// GFX12+ only builtins.
+//===----------------------------------------------------------------------===//
+
+def __builtin_amdgcn_s_sleep_var : AMDGPUBuiltin<"void(unsigned int)", [], "gfx12-insts">;
+def __builtin_amdgcn_permlane16_var : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int, _Constant bool, _Constant bool)", [Const], "gfx12-insts">;
+def __builtin_amdgcn_permlanex16_var : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int, _Constant bool, _Constant bool)", [Const], "gfx12-insts">;
+def __builtin_amdgcn_s_barrier_signal : AMDGPUBuiltin<"void(_Constant int)", [], "gfx12-insts">;
+def __builtin_amdgcn_s_barrier_signal_var : AMDGPUBuiltin<"void(void *, int)", [], "gfx12-insts">;
+def __builtin_amdgcn_s_barrier_wait : AMDGPUBuiltin<"void(_Constant short)", [], "gfx12-insts">;
+def __builtin_amdgcn_s_barrier_signal_isfirst : AMDGPUBuiltin<"bool(_Constant int)", [], "gfx12-insts">;
+def __builtin_amdgcn_s_barrier_init : AMDGPUBuiltin<"void(void *, int)", [], "gfx12-insts">;
+def __builtin_amdgcn_s_barrier_join : AMDGPUBuiltin<"void(void *)", [], "gfx12-insts">;
+def __builtin_amdgcn_s_barrier_leave : AMDGPUBuiltin<"void(_Constant short)", [], "gfx12-insts">;
+def __builtin_amdgcn_s_get_barrier_state : AMDGPUBuiltin<"unsigned int(int)", [], "gfx12-insts">;
+def __builtin_amdgcn_s_get_named_barrier_state : AMDGPUBuiltin<"unsigned int(void *)", [], "gfx12-insts">;
+def __builtin_amdgcn_s_prefetch_data : AMDGPUBuiltin<"void(void const *, unsigned int)", [Const], "gfx12-insts">;
+def __builtin_amdgcn_s_buffer_prefetch_data : AMDGPUBuiltin<"void(__amdgpu_buffer_rsrc_t, _Constant int, unsigned int)", [Const], "gfx12-insts">;
+
+def __builtin_amdgcn_global_load_tr_b64_v2i32 : AMDGPUBuiltin<"_Vector<2, int>(_Vector<2, int address_space<1> *>)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_global_load_tr_b128_v8i16 : AMDGPUBuiltin<"_Vector<8, short>(_Vector<8, short address_space<1> *>)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_global_load_tr_b128_v8f16 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<8, __fp16 address_space<1> *>)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_global_load_tr_b128_v8bf16 : AMDGPUBuiltin<"_Vector<8, __bf16>(_Vector<8, __bf16 address_space<1> *>)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_global_load_tr_b64_i32 : AMDGPUBuiltin<"int(int address_space<1> *)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_global_load_tr_b128_v4i16 : AMDGPUBuiltin<"_Vector<4, short>(_Vector<4, short address_space<1> *>)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_global_load_tr_b128_v4f16 : AMDGPUBuiltin<"_Vector<4, __fp16>(_Vector<4, __fp16 address_space<1> *>)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_global_load_tr_b128_v4bf16 : AMDGPUBuiltin<"_Vector<4, __bf16>(_Vector<4, __bf16 address_space<1> *>)", [Const], "gfx12-insts,wavefrontsize64">;
+
+def __builtin_amdgcn_ds_bpermute_fi_b32 : AMDGPUBuiltin<"int(int, int)", [Const], "gfx12-insts">;
+
+// For the following two builtins, the second and third return values of the
+// intrinsics are returned through the last two pointer-type function arguments.
+def __builtin_amdgcn_image_bvh8_intersect_ray : AMDGPUBuiltin<"_Vector<10, unsigned int>(uint64_t, float, unsigned char, _Vector<3, float>, _Vector<3, float>, unsigned int, _Vector<4, unsigned int>, _Vector<3, float *>, _Vector<3, float *>)", [Const], "gfx12-insts">;
+def __builtin_amdgcn_image_bvh_dual_intersect_ray : AMDGPUBuiltin<"_Vector<10, unsigned int>(uint64_t, float, unsigned char, _Vector<3, float>, _Vector<3, float>, _Vector<2, unsigned int>, _Vector<4, unsigned int>, _Vector<3, float *>, _Vector<3, float *>)", [Const], "gfx12-insts">;
+
+def __builtin_amdgcn_ds_bvh_stack_push4_pop1_rtn : AMDGPUBuiltin<"_Vector<2, unsigned int>(unsigned int, unsigned int, _Vector<4, unsigned int>, _Constant int)", [], "gfx11-insts">;
+def __builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn : AMDGPUBuiltin<"_Vector<2, unsigned int>(unsigned int, unsigned int, _Vector<8, unsigned int>, _Constant int)", [], "gfx12-insts">;
+
+// The intrinsic returns {i64, i32}, the builtin returns <2 x i64>.
+// The second return value of the intrinsic is zext'ed.
+def __builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn : AMDGPUBuiltin<"_Vector<2, uint64_t>(unsigned int, unsigned int, _Vector<8, unsigned int>, _Constant int)", [], "gfx12-insts">;
+
+//===----------------------------------------------------------------------===//
+// WMMA builtins.
+// Postfix w32 indicates the builtin requires wavefront size of 32.
+// Postfix w64 indicates the builtin requires wavefront size of 64.
+//
+// Some of these are very similar to their GFX11 counterparts, but they don't
+// require replication of the A,B matrices, so they use fewer vector elements.
+// Therefore, we add an "_gfx12" suffix to distinguish them from the existing
+// builtins.
+//===----------------------------------------------------------------------===//
+def __builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, float>)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<8, short>, _Vector<8, short>, _Vector<8, float>)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12 : AMDGPUBuiltin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12 : AMDGPUBuiltin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Vector<8, short>)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12 : AMDGPUBuiltin<"_Vector<8, int>(_Constant bool, _Vector<2, int>, _Constant bool, _Vector<2, int>, _Vector<8, int>, _Constant bool)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12 : AMDGPUBuiltin<"_Vector<8, int>(_Constant bool, int, _Constant bool, int, _Vector<8, int>, _Constant bool)", [Const], "gfx12-insts,wavefrontsize32">;
+// These are gfx12-only, but for consistency with the other WMMA variants we're
+// keeping the "_gfx12" suffix.
+def __builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<2, int>, _Vector<2, int>, _Vector<8, float>)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<2, int>, _Vector<2, int>, _Vector<8, float>)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<2, int>, _Vector<2, int>, _Vector<8, float>)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<2, int>, _Vector<2, int>, _Vector<8, float>)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12 : AMDGPUBuiltin<"_Vector<8, int>(_Constant bool, _Vector<2, int>, _Constant bool, _Vector<2, int>, _Vector<8, int>, _Constant bool)", [Const], "gfx12-insts,wavefrontsize32">;
+
+def __builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<4, _Float16>, _Vector<4, _Float16>, _Vector<4, float>)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<4, short>, _Vector<4, short>, _Vector<4, float>)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12 : AMDGPUBuiltin<"_Vector<4, _Float16>(_Vector<4, _Float16>, _Vector<4, _Float16>, _Vector<4, _Float16>)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12 : AMDGPUBuiltin<"_Vector<4, short>(_Vector<4, short>, _Vector<4, short>, _Vector<4, short>)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12 : AMDGPUBuiltin<"_Vector<4, int>(_Constant bool, int, _Constant bool, int, _Vector<4, int>, _Constant bool)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12 : AMDGPUBuiltin<"_Vector<4, int>(_Constant bool, int, _Constant bool, int, _Vector<4, int>, _Constant bool)", [Const], "gfx12-insts,wavefrontsize64">;
+// These are gfx12-only, but for consistency with the other WMMA variants we're
+// keeping the "_gfx12" suffix.
+def __builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12 : AMDGPUBuiltin<"_Vector<4, float>(int, int, _Vector<4, float>)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12 : AMDGPUBuiltin<"_Vector<4, float>(int, int, _Vector<4, float>)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12 : AMDGPUBuiltin<"_Vector<4, float>(int, int, _Vector<4, float>)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12 : AMDGPUBuiltin<"_Vector<4, float>(int, int, _Vector<4, float>)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12 : AMDGPUBuiltin<"_Vector<4, int>(_Constant bool, int, _Constant bool, int, _Vector<4, int>, _Constant bool)", [Const], "gfx12-insts,wavefrontsize64">;
+
+def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<8, __fp16>, _Vector<16, __fp16>, _Vector<8, float>, int)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<8, short>, _Vector<16, short>, _Vector<8, float>, int)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<8, __fp16>, _Vector<16, __fp16>, _Vector<8, __fp16>, int)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32 : AMDGPUBuiltin<"_Vector<8, short>(_Vector<8, short>, _Vector<16, short>, _Vector<8, short>, int)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32 : AMDGPUBuiltin<"_Vector<8, int>(_Constant bool, _Vector<2, int>, _Constant bool, _Vector<4, int>, _Vector<8, int>, int, _Constant bool)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32 : AMDGPUBuiltin<"_Vector<8, int>(_Constant bool, int, _Constant bool, _Vector<2, int>, _Vector<8, int>, int, _Constant bool)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32 : AMDGPUBuiltin<"_Vector<8, int>(_Constant bool, _Vector<2, int>, _Constant bool, _Vector<4, int>, _Vector<8, int>, int, _Constant bool)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<2, int>, _Vector<4, int>, _Vector<8, float>, int)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<2, int>, _Vector<4, int>, _Vector<8, float>, int)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<2, int>, _Vector<4, int>, _Vector<8, float>, int)", [Const], "gfx12-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<2, int>, _Vector<4, int>, _Vector<8, float>, int)", [Const], "gfx12-insts,wavefrontsize32">;
+
+def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w64 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<4, __fp16>, _Vector<8, __fp16>, _Vector<4, float>, int)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<4, short>, _Vector<8, short>, _Vector<4, float>, int)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w64 : AMDGPUBuiltin<"_Vector<4, __fp16>(_Vector<4, __fp16>, _Vector<8, __fp16>, _Vector<4, __fp16>, int)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64 : AMDGPUBuiltin<"_Vector<4, short>(_Vector<4, short>, _Vector<8, short>, _Vector<4, short>, int)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64 : AMDGPUBuiltin<"_Vector<4, int>(_Constant bool, int, _Constant bool, _Vector<2, int>, _Vector<4, int>, int, _Constant bool)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64 : AMDGPUBuiltin<"_Vector<4, int>(_Constant bool, int, _Constant bool, int, _Vector<4, int>, int, _Constant bool)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64 : AMDGPUBuiltin<"_Vector<4, int>(_Constant bool, int, _Constant bool, _Vector<2, int>, _Vector<4, int>, int, _Constant bool)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64 : AMDGPUBuiltin<"_Vector<4, float>(int, _Vector<2, int>, _Vector<4, float>, int)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64 : AMDGPUBuiltin<"_Vector<4, float>(int, _Vector<2, int>, _Vector<4, float>, int)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64 : AMDGPUBuiltin<"_Vector<4, float>(int, _Vector<2, int>, _Vector<4, float>, int)", [Const], "gfx12-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64 : AMDGPUBuiltin<"_Vector<4, float>(int, _Vector<2, int>, _Vector<4, float>, int)", [Const], "gfx12-insts,wavefrontsize64">;
+
+def __builtin_amdgcn_prng_b32 : AMDGPUBuiltin<"unsigned int(unsigned int)", [Const], "prng-inst">;
+def __builtin_amdgcn_cvt_scalef32_pk32_fp6_f16 : AMDGPUBuiltin<"_Vector<6, unsigned int>(_Vector<32, _Float16>, float)", [Const], "f16bf16-to-fp6bf6-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk32_bf6_f16 : AMDGPUBuiltin<"_Vector<6, unsigned int>(_Vector<32, _Float16>, float)", [Const], "f16bf16-to-fp6bf6-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk32_fp6_bf16 : AMDGPUBuiltin<"_Vector<6, unsigned int>(_Vector<32, __bf16>, float)", [Const], "f16bf16-to-fp6bf6-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk32_bf6_bf16 : AMDGPUBuiltin<"_Vector<6, unsigned int>(_Vector<32, __bf16>, float)", [Const], "f16bf16-to-fp6bf6-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_f16_fp8 : AMDGPUBuiltin<"_Vector<2, _Float16>(_Vector<2, _Float16>, int, float, _Constant int, _Constant bool)", [Const], "fp8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_f16_bf8 : AMDGPUBuiltin<"_Vector<2, _Float16>(_Vector<2, _Float16>, int, float, _Constant int, _Constant bool)", [Const], "bf8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_f32_fp8 : AMDGPUBuiltin<"float(int, float, _Constant int)", [Const], "fp8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_f32_bf8 : AMDGPUBuiltin<"float(int, float, _Constant int)", [Const], "bf8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_fp8_f32 : AMDGPUBuiltin<"_Vector<2, short>(_Vector<2, short>, float, float, float, _Constant bool)", [Const], "fp8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_bf8_f32 : AMDGPUBuiltin<"_Vector<2, short>(_Vector<2, short>, float, float, float, _Constant bool)", [Const], "bf8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_f32_fp8 : AMDGPUBuiltin<"_Vector<2, float>(unsigned int, float, _Constant bool)", [Const], "fp8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_f32_bf8 : AMDGPUBuiltin<"_Vector<2, float>(unsigned int, float, _Constant bool)", [Const], "bf8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_fp8_f16 : AMDGPUBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, _Float16>, float, _Constant bool)", [Const], "fp8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_fp8_bf16 : AMDGPUBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, __bf16>, float, _Constant bool)", [Const], "fp8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_bf8_f16 : AMDGPUBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, _Float16>, float, _Constant bool)", [Const], "bf8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_bf8_bf16 : AMDGPUBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, __bf16>, float, _Constant bool)", [Const], "bf8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_f32_fp4 : AMDGPUBuiltin<"_Vector<2, float>(unsigned int, float, _Constant int)", [Const], "fp4-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_fp4_f32 : AMDGPUBuiltin<"unsigned int(unsigned int, float, float, float, _Constant int)", [Const], "fp4-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_f16_fp4 : AMDGPUBuiltin<"_Vector<2, _Float16>(unsigned int, float, _Constant int)", [Const], "fp4-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_bf16_fp4 : AMDGPUBuiltin<"_Vector<2, __bf16>(unsigned int, float, _Constant int)", [Const], "fp4-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk32_f32_fp6 : AMDGPUBuiltin<"_Vector<32, float>(_Vector<6, unsigned int>, float)", [Const], "fp6bf6-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk32_f32_bf6 : AMDGPUBuiltin<"_Vector<32, float>(_Vector<6, unsigned int>, float)", [Const], "fp6bf6-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk32_f16_fp6 : AMDGPUBuiltin<"_Vector<32, _Float16>(_Vector<6, unsigned int>, float)", [Const], "fp6bf6-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk32_bf16_fp6 : AMDGPUBuiltin<"_Vector<32, __bf16>(_Vector<6, unsigned int>, float)", [Const], "fp6bf6-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk32_f16_bf6 : AMDGPUBuiltin<"_Vector<32, _Float16>(_Vector<6, unsigned int>, float)", [Const], "fp6bf6-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk32_bf16_bf6 : AMDGPUBuiltin<"_Vector<32, __bf16>(_Vector<6, unsigned int>, float)", [Const], "fp6bf6-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_f16_fp8 : AMDGPUBuiltin<"_Vector<2, _Float16>(unsigned int, float, _Constant bool)", [Const], "fp8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_bf16_fp8 : AMDGPUBuiltin<"_Vector<2, __bf16>(unsigned int, float, _Constant bool)", [Const], "fp8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_f16_bf8 : AMDGPUBuiltin<"_Vector<2, _Float16>(unsigned int, float, _Constant bool)", [Const], "bf8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_bf16_bf8 : AMDGPUBuiltin<"_Vector<2, __bf16>(unsigned int, float, _Constant bool)", [Const], "bf8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_fp4_f16 : AMDGPUBuiltin<"unsigned int(unsigned int, _Vector<2, _Float16>, float, _Constant int)", [Const], "fp4-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk_fp4_bf16 : AMDGPUBuiltin<"unsigned int(unsigned int, _Vector<2, __bf16>, float, _Constant int)", [Const], "fp4-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f16 : AMDGPUBuiltin<"unsigned int(unsigned int, _Vector<2, _Float16>, unsigned int, float, _Constant int)", [Const], "fp4-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk_fp4_bf16 : AMDGPUBuiltin<"unsigned int(unsigned int, _Vector<2, __bf16>, unsigned int, float, _Constant int)", [Const], "fp4-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f32 : AMDGPUBuiltin<"unsigned int(unsigned int, _Vector<2, float>, unsigned int, float, _Constant int)", [Const], "fp4-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_bf8_bf16 : AMDGPUBuiltin<"int(int, __bf16, unsigned int, float, _Constant int)", [Const], "bf8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_bf8_f16 : AMDGPUBuiltin<"int(int, _Float16, unsigned int, float, _Constant int)", [Const], "bf8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_bf8_f32 : AMDGPUBuiltin<"int(int, float, unsigned int, float, _Constant int)", [Const], "bf8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_fp8_bf16 : AMDGPUBuiltin<"int(int, __bf16, unsigned int, float, _Constant int)", [Const], "fp8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_fp8_f16 : AMDGPUBuiltin<"int(int, _Float16, unsigned int, float, _Constant int)", [Const], "fp8-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_fp8_f32 : AMDGPUBuiltin<"int(int, float, unsigned int, float, _Constant int)", [Const], "fp8-cvt-scale-insts">;
+
+def __builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16 : AMDGPUBuiltin<"_Vector<6, unsigned int>(_Vector<32, __bf16>, unsigned int, float)", [Const], "f16bf16-to-fp6bf6-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f16 : AMDGPUBuiltin<"_Vector<6, unsigned int>(_Vector<32, _Float16>, unsigned int, float)", [Const], "f16bf16-to-fp6bf6-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f32 : AMDGPUBuiltin<"_Vector<6, unsigned int>(_Vector<32, float>, unsigned int, float)", [Const], "f16bf16-to-fp6bf6-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16 : AMDGPUBuiltin<"_Vector<6, unsigned int>(_Vector<32, __bf16>, unsigned int, float)", [Const], "f16bf16-to-fp6bf6-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f16 : AMDGPUBuiltin<"_Vector<6, unsigned int>(_Vector<32, _Float16>, unsigned int, float)", [Const], "f16bf16-to-fp6bf6-cvt-scale-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f32 : AMDGPUBuiltin<"_Vector<6, unsigned int>(_Vector<32, float>, unsigned int, float)", [Const], "f16bf16-to-fp6bf6-cvt-scale-insts">;
+def __builtin_amdgcn_bitop3_b32 : AMDGPUBuiltin<"int(int, int, int, _Constant unsigned int)", [Const], "bitop3-insts">;
+def __builtin_amdgcn_bitop3_b16 : AMDGPUBuiltin<"short(short, short, short, _Constant unsigned int)", [Const], "bitop3-insts">;
+
+def __builtin_amdgcn_cvt_sr_bf16_f32 : AMDGPUBuiltin<"_Vector<2, __bf16>(_Vector<2, __bf16>, float, unsigned int, _Constant bool)", [Const], "f32-to-f16bf16-cvt-sr-insts">;
+def __builtin_amdgcn_cvt_sr_f16_f32 : AMDGPUBuiltin<"_Vector<2, _Float16>(_Vector<2, _Float16>, float, unsigned int, _Constant bool)", [Const], "f32-to-f16bf16-cvt-sr-insts">;
+
+//===----------------------------------------------------------------------===//
+// GFX1250+ only builtins.
+//===----------------------------------------------------------------------===//
+def __builtin_amdgcn_s_cluster_barrier : AMDGPUBuiltin<"void()", [], "gfx1250-insts">;
+
+def __builtin_amdgcn_flat_prefetch : AMDGPUBuiltin<"void(void const address_space<0> *, _Constant int)", [Const], "vmem-pref-insts">;
+def __builtin_amdgcn_global_prefetch : AMDGPUBuiltin<"void(void const address_space<1> *, _Constant int)", [Const], "vmem-pref-insts">;
+
+def __builtin_amdgcn_global_load_monitor_b32 : AMDGPUBuiltin<"int(int address_space<1> *, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_global_load_monitor_b64 : AMDGPUBuiltin<"_Vector<2, int>(_Vector<2, int address_space<1> *>, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_global_load_monitor_b128 : AMDGPUBuiltin<"_Vector<4, int>(_Vector<4, int address_space<1> *>, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_flat_load_monitor_b32 : AMDGPUBuiltin<"int(int address_space<0> *, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_flat_load_monitor_b64 : AMDGPUBuiltin<"_Vector<2, int>(_Vector<2, int address_space<0> *>, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_flat_load_monitor_b128 : AMDGPUBuiltin<"_Vector<4, int>(_Vector<4, int address_space<0> *>, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cluster_load_b32 : AMDGPUBuiltin<"int(int address_space<1> *, _Constant int, int)", [Const], "mcast-load-insts,wavefrontsize32">;
+def __builtin_amdgcn_cluster_load_b64 : AMDGPUBuiltin<"_Vector<2, int>(_Vector<2, int address_space<1> *>, _Constant int, int)", [Const], "mcast-load-insts,wavefrontsize32">;
+def __builtin_amdgcn_cluster_load_b128 : AMDGPUBuiltin<"_Vector<4, int>(_Vector<4, int address_space<1> *>, _Constant int, int)", [Const], "mcast-load-insts,wavefrontsize32">;
+def __builtin_amdgcn_cluster_load_async_to_lds_b8 : AMDGPUBuiltin<"void(char address_space<1> *, char address_space<3> *, _Constant int, _Constant int, int)", [Const], "mcast-load-insts,wavefrontsize32">;
+def __builtin_amdgcn_cluster_load_async_to_lds_b32 : AMDGPUBuiltin<"void(int address_space<1> *, int address_space<3> *, _Constant int, _Constant int, int)", [Const], "mcast-load-insts,wavefrontsize32">;
+def __builtin_amdgcn_cluster_load_async_to_lds_b64 : AMDGPUBuiltin<"void(_Vector<2, int address_space<1> *>, _Vector<2, int address_space<3> *>, _Constant int, _Constant int, int)", [Const], "mcast-load-insts,wavefrontsize32">;
+def __builtin_amdgcn_cluster_load_async_to_lds_b128 : AMDGPUBuiltin<"void(_Vector<4, int address_space<1> *>, _Vector<4, int address_space<3> *>, _Constant int, _Constant int, int)", [Const], "mcast-load-insts,wavefrontsize32">;
+def __builtin_amdgcn_global_load_async_to_lds_b8 : AMDGPUBuiltin<"void(char address_space<1> *, char address_space<3> *, _Constant int, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_global_load_async_to_lds_b32 : AMDGPUBuiltin<"void(int address_space<1> *, int address_space<3> *, _Constant int, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_global_load_async_to_lds_b64 : AMDGPUBuiltin<"void(_Vector<2, int address_space<1> *>, _Vector<2, int address_space<3> *>, _Constant int, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_global_load_async_to_lds_b128 : AMDGPUBuiltin<"void(_Vector<4, int address_space<1> *>, _Vector<4, int address_space<3> *>, _Constant int, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_global_store_async_from_lds_b8 : AMDGPUBuiltin<"void(char address_space<1> *, char address_space<3> *, _Constant int, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_global_store_async_from_lds_b32 : AMDGPUBuiltin<"void(int address_space<1> *, int address_space<3> *, _Constant int, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_global_store_async_from_lds_b64 : AMDGPUBuiltin<"void(_Vector<2, int address_space<1> *>, _Vector<2, int address_space<3> *>, _Constant int, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_global_store_async_from_lds_b128 : AMDGPUBuiltin<"void(_Vector<4, int address_space<1> *>, _Vector<4, int address_space<3> *>, _Constant int, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_ds_atomic_async_barrier_arrive_b64 : AMDGPUBuiltin<"void(long int address_space<3> *)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_ds_atomic_barrier_arrive_rtn_b64 : AMDGPUBuiltin<"long int(long int address_space<3> *, long int)", [Const], "gfx1250-insts">;
+
+def __builtin_amdgcn_tensor_load_to_lds : AMDGPUBuiltin<"void(_Vector<4, int>, _Vector<8, int>, _Vector<4, int>, _Vector<4, int>, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_tensor_load_to_lds_d2 : AMDGPUBuiltin<"void(_Vector<4, int>, _Vector<8, int>, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_tensor_store_from_lds : AMDGPUBuiltin<"void(_Vector<4, int>, _Vector<8, int>, _Vector<4, int>, _Vector<4, int>, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_tensor_store_from_lds_d2 : AMDGPUBuiltin<"void(_Vector<4, int>, _Vector<8, int>, _Constant int)", [Const], "gfx1250-insts">;
+
+def __builtin_amdgcn_global_load_tr4_b64_v2i32 : AMDGPUBuiltin<"_Vector<2, int>(_Vector<2, int address_space<1> *>)", [Const], "transpose-load-f4f6-insts,wavefrontsize32">;
+def __builtin_amdgcn_global_load_tr8_b64_v2i32 : AMDGPUBuiltin<"_Vector<2, int>(_Vector<2, int address_space<1> *>)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_global_load_tr6_b96_v3i32 : AMDGPUBuiltin<"_Vector<3, int>(_Vector<3, int address_space<1> *>)", [Const], "transpose-load-f4f6-insts,wavefrontsize32">;
+def __builtin_amdgcn_global_load_tr16_b128_v8i16 : AMDGPUBuiltin<"_Vector<8, short>(_Vector<8, short address_space<1> *>)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_global_load_tr16_b128_v8f16 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<8, __fp16 address_space<1> *>)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_global_load_tr16_b128_v8bf16 : AMDGPUBuiltin<"_Vector<8, __bf16>(_Vector<8, __bf16 address_space<1> *>)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_ds_load_tr4_b64_v2i32 : AMDGPUBuiltin<"_Vector<2, int>(_Vector<2, int address_space<3> *>)", [Const], "transpose-load-f4f6-insts,wavefrontsize32">;
+def __builtin_amdgcn_ds_load_tr8_b64_v2i32 : AMDGPUBuiltin<"_Vector<2, int>(_Vector<2, int address_space<3> *>)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_ds_load_tr6_b96_v3i32 : AMDGPUBuiltin<"_Vector<3, int>(_Vector<3, int address_space<3> *>)", [Const], "transpose-load-f4f6-insts,wavefrontsize32">;
+def __builtin_amdgcn_ds_load_tr16_b128_v8i16 : AMDGPUBuiltin<"_Vector<8, short>(_Vector<8, short address_space<3> *>)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_ds_load_tr16_b128_v8f16 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<8, __fp16 address_space<3> *>)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_ds_load_tr16_b128_v8bf16 : AMDGPUBuiltin<"_Vector<8, __bf16>(_Vector<8, __bf16 address_space<3> *>)", [Const], "gfx1250-insts,wavefrontsize32">;
+
+def __builtin_amdgcn_s_setprio_inc_wg : AMDGPUBuiltin<"void(_Constant short)", [], "setprio-inc-wg-inst">;
+def __builtin_amdgcn_s_monitor_sleep : AMDGPUBuiltin<"void(_Constant short)", [], "gfx1250-insts">;
+def __builtin_amdgcn_s_wakeup_barrier : AMDGPUBuiltin<"void(void *)", [], "s-wakeup-barrier-inst">;
+
+def __builtin_amdgcn_s_wait_asynccnt : AMDGPUBuiltin<"void(_Constant unsigned short)", [], "gfx1250-insts">;
+def __builtin_amdgcn_s_wait_tensorcnt : AMDGPUBuiltin<"void(_Constant unsigned short)", [], "gfx1250-insts">;
+
+def __builtin_amdgcn_tanhf : AMDGPUBuiltin<"float(float)", [Const], "tanh-insts">;
+def __builtin_amdgcn_tanhh : AMDGPUBuiltin<"__fp16(__fp16)", [Const], "tanh-insts">;
+def __builtin_amdgcn_tanh_bf16 : AMDGPUBuiltin<"__bf16(__bf16)", [Const], "bf16-trans-insts">;
+def __builtin_amdgcn_rcp_bf16 : AMDGPUBuiltin<"__bf16(__bf16)", [Const], "bf16-trans-insts">;
+def __builtin_amdgcn_sqrt_bf16 : AMDGPUBuiltin<"__bf16(__bf16)", [Const], "bf16-trans-insts">;
+def __builtin_amdgcn_rsq_bf16 : AMDGPUBuiltin<"__bf16(__bf16)", [Const], "bf16-trans-insts">;
+def __builtin_amdgcn_log_bf16 : AMDGPUBuiltin<"__bf16(__bf16)", [Const], "bf16-trans-insts">;
+def __builtin_amdgcn_exp2_bf16 : AMDGPUBuiltin<"__bf16(__bf16)", [Const], "bf16-trans-insts">;
+def __builtin_amdgcn_sin_bf16 : AMDGPUBuiltin<"__bf16(__bf16)", [Const], "bf16-trans-insts">;
+def __builtin_amdgcn_cos_bf16 : AMDGPUBuiltin<"__bf16(__bf16)", [Const], "bf16-trans-insts">;
+
+def __builtin_amdgcn_cvt_sr_pk_bf16_f32 : AMDGPUBuiltin<"_Vector<2, __bf16>(float, float, int)", [Const], "bf16-cvt-insts">;
+def __builtin_amdgcn_cvt_sr_pk_f16_f32 : AMDGPUBuiltin<"_Vector<2, __fp16>(float, float, int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_f16_fp8 : AMDGPUBuiltin<"__fp16(int, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_f16_bf8 : AMDGPUBuiltin<"__fp16(int, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_pk_f16_fp8 : AMDGPUBuiltin<"_Vector<2, __fp16>(short)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_pk_f16_bf8 : AMDGPUBuiltin<"_Vector<2, __fp16>(short)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_pk_fp8_f16 : AMDGPUBuiltin<"short(_Vector<2, __fp16>)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_pk_bf8_f16 : AMDGPUBuiltin<"short(_Vector<2, __fp16>)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_sr_fp8_f16 : AMDGPUBuiltin<"int(__fp16, int, unsigned int, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_sr_bf8_f16 : AMDGPUBuiltin<"int(__fp16, int, unsigned int, _Constant int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scale_pk8_f16_fp8 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<2, unsigned int>, unsigned int, _Constant unsigned int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scale_pk8_bf16_fp8 : AMDGPUBuiltin<"_Vector<8, __bf16>(_Vector<2, unsigned int>, unsigned int, _Constant unsigned int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scale_pk8_f16_bf8 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<2, unsigned int>, unsigned int, _Constant unsigned int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scale_pk8_bf16_bf8 : AMDGPUBuiltin<"_Vector<8, __bf16>(_Vector<2, unsigned int>, unsigned int, _Constant unsigned int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scale_pk8_f16_fp4 : AMDGPUBuiltin<"_Vector<8, __fp16>(unsigned int, unsigned int, _Constant unsigned int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scale_pk8_bf16_fp4 : AMDGPUBuiltin<"_Vector<8, __bf16>(unsigned int, unsigned int, _Constant unsigned int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scale_pk8_f32_fp8 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<2, unsigned int>, unsigned int, _Constant unsigned int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scale_pk8_f32_bf8 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<2, unsigned int>, unsigned int, _Constant unsigned int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scale_pk8_f32_fp4 : AMDGPUBuiltin<"_Vector<8, float>(unsigned int, unsigned int, _Constant unsigned int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scale_pk16_f16_fp6 : AMDGPUBuiltin<"_Vector<16, __fp16>(_Vector<3, unsigned int>, unsigned int, _Constant unsigned int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scale_pk16_bf16_fp6 : AMDGPUBuiltin<"_Vector<16, __bf16>(_Vector<3, unsigned int>, unsigned int, _Constant unsigned int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scale_pk16_f16_bf6 : AMDGPUBuiltin<"_Vector<16, __fp16>(_Vector<3, unsigned int>, unsigned int, _Constant unsigned int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scale_pk16_bf16_bf6 : AMDGPUBuiltin<"_Vector<16, __bf16>(_Vector<3, unsigned int>, unsigned int, _Constant unsigned int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scale_pk16_f32_fp6 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<3, unsigned int>, unsigned int, _Constant unsigned int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scale_pk16_f32_bf6 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<3, unsigned int>, unsigned int, _Constant unsigned int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk8_fp8_bf16 : AMDGPUBuiltin<"_Vector<2, unsigned int>(_Vector<8, __bf16>, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk8_bf8_bf16 : AMDGPUBuiltin<"_Vector<2, unsigned int>(_Vector<8, __bf16>, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk8_fp8_f16 : AMDGPUBuiltin<"_Vector<2, unsigned int>(_Vector<8, __fp16>, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk8_bf8_f16 : AMDGPUBuiltin<"_Vector<2, unsigned int>(_Vector<8, __fp16>, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk8_fp8_f32 : AMDGPUBuiltin<"_Vector<2, unsigned int>(_Vector<8, float>, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk8_bf8_f32 : AMDGPUBuiltin<"_Vector<2, unsigned int>(_Vector<8, float>, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk8_fp4_f32 : AMDGPUBuiltin<"unsigned int(_Vector<8, float>, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk8_fp4_f16 : AMDGPUBuiltin<"unsigned int(_Vector<8, __fp16>, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk8_fp4_bf16 : AMDGPUBuiltin<"unsigned int(_Vector<8, __bf16>, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk16_fp6_f32 : AMDGPUBuiltin<"_Vector<3, unsigned int>(_Vector<16, float>, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk16_bf6_f32 : AMDGPUBuiltin<"_Vector<3, unsigned int>(_Vector<16, float>, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk16_fp6_f16 : AMDGPUBuiltin<"_Vector<3, unsigned int>(_Vector<16, __fp16>, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk16_bf6_f16 : AMDGPUBuiltin<"_Vector<3, unsigned int>(_Vector<16, __fp16>, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk16_fp6_bf16 : AMDGPUBuiltin<"_Vector<3, unsigned int>(_Vector<16, __bf16>, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_pk16_bf6_bf16 : AMDGPUBuiltin<"_Vector<3, unsigned int>(_Vector<16, __bf16>, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk8_fp8_bf16 : AMDGPUBuiltin<"_Vector<2, unsigned int>(_Vector<8, __bf16>, unsigned int, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk8_bf8_bf16 : AMDGPUBuiltin<"_Vector<2, unsigned int>(_Vector<8, __bf16>, unsigned int, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk8_fp8_f16 : AMDGPUBuiltin<"_Vector<2, unsigned int>(_Vector<8, __fp16>, unsigned int, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk8_bf8_f16 : AMDGPUBuiltin<"_Vector<2, unsigned int>(_Vector<8, __fp16>, unsigned int, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk8_fp8_f32 : AMDGPUBuiltin<"_Vector<2, unsigned int>(_Vector<8, float>, unsigned int, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk8_bf8_f32 : AMDGPUBuiltin<"_Vector<2, unsigned int>(_Vector<8, float>, unsigned int, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk8_fp4_f32 : AMDGPUBuiltin<"unsigned int(_Vector<8, float>, unsigned int, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk8_fp4_f16 : AMDGPUBuiltin<"unsigned int(_Vector<8, __fp16>, unsigned int, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk8_fp4_bf16 : AMDGPUBuiltin<"unsigned int(_Vector<8, __bf16>, unsigned int, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk16_bf6_bf16 : AMDGPUBuiltin<"_Vector<3, unsigned int>(_Vector<16, __bf16>, unsigned int, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk16_bf6_f16 : AMDGPUBuiltin<"_Vector<3, unsigned int>(_Vector<16, __fp16>, unsigned int, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk16_bf6_f32 : AMDGPUBuiltin<"_Vector<3, unsigned int>(_Vector<16, float>, unsigned int, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk16_fp6_bf16 : AMDGPUBuiltin<"_Vector<3, unsigned int>(_Vector<16, __bf16>, unsigned int, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk16_fp6_f16 : AMDGPUBuiltin<"_Vector<3, unsigned int>(_Vector<16, __fp16>, unsigned int, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_scalef32_sr_pk16_fp6_f32 : AMDGPUBuiltin<"_Vector<3, unsigned int>(_Vector<16, float>, unsigned int, float)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_cvt_pk_fp8_f32_e5m3 : AMDGPUBuiltin<"int(float, float, int, _Constant bool)", [Const], "fp8e5m3-insts">;
+def __builtin_amdgcn_cvt_sr_fp8_f32_e5m3 : AMDGPUBuiltin<"int(float, int, int, _Constant int)", [Const], "fp8e5m3-insts">;
+def __builtin_amdgcn_sat_pk4_i4_i8 : AMDGPUBuiltin<"unsigned short(unsigned int)", [Const], "gfx1250-insts">;
+def __builtin_amdgcn_sat_pk4_u4_u8 : AMDGPUBuiltin<"unsigned short(unsigned int)", [Const], "gfx1250-insts">;
+
+def __builtin_amdgcn_permlane_bcast : AMDGPUBuiltin<"int(int, int, int)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_permlane_up : AMDGPUBuiltin<"int(int, int, int)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_permlane_down : AMDGPUBuiltin<"int(int, int, int)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_permlane_xor : AMDGPUBuiltin<"int(int, int, int)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_permlane_idx_gen : AMDGPUBuiltin<"int(int, int)", [Const], "gfx1250-insts,wavefrontsize32">;
+
+def __builtin_amdgcn_perm_pk16_b4_u4 : AMDGPUBuiltin<"_Vector<2, unsigned int>(unsigned int, unsigned int, _Vector<2, unsigned int>)", [Const], "tensor-cvt-lut-insts">;
+def __builtin_amdgcn_perm_pk16_b6_u4 : AMDGPUBuiltin<"_Vector<3, unsigned int>(unsigned int, unsigned long int, _Vector<2, unsigned int>)", [Const], "tensor-cvt-lut-insts">;
+def __builtin_amdgcn_perm_pk16_b8_u4 : AMDGPUBuiltin<"_Vector<4, unsigned int>(unsigned long int, unsigned long int, _Vector<2, unsigned int>)", [Const], "tensor-cvt-lut-insts">;
+
+def __builtin_amdgcn_add_max_i32 : AMDGPUBuiltin<"int(int, int, int, _Constant bool)", [Const], "add-min-max-insts">;
+def __builtin_amdgcn_add_max_u32 : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int, _Constant bool)", [Const], "add-min-max-insts">;
+def __builtin_amdgcn_add_min_i32 : AMDGPUBuiltin<"int(int, int, int, _Constant bool)", [Const], "add-min-max-insts">;
+def __builtin_amdgcn_add_min_u32 : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int, _Constant bool)", [Const], "add-min-max-insts">;
+def __builtin_amdgcn_pk_add_max_i16 : AMDGPUBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, short>, _Vector<2, short>, _Constant bool)", [Const], "pk-add-min-max-insts">;
+def __builtin_amdgcn_pk_add_max_u16 : AMDGPUBuiltin<"_Vector<2, unsigned short>(_Vector<2, unsigned short>, _Vector<2, unsigned short>, _Vector<2, unsigned short>, _Constant bool)", [Const], "pk-add-min-max-insts">;
+def __builtin_amdgcn_pk_add_min_i16 : AMDGPUBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, short>, _Vector<2, short>, _Constant bool)", [Const], "pk-add-min-max-insts">;
+def __builtin_amdgcn_pk_add_min_u16 : AMDGPUBuiltin<"_Vector<2, unsigned short>(_Vector<2, unsigned short>, _Vector<2, unsigned short>, _Vector<2, unsigned short>, _Constant bool)", [Const], "pk-add-min-max-insts">;
+
+// GFX1250 WMMA builtins
+def __builtin_amdgcn_wmma_f32_16x16x4_f32 : AMDGPUBuiltin<"_Vector<8, float>(_Constant bool, _Vector<2, float>, _Constant bool, _Vector<2, float>, _Constant short, _Vector<8, float>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x32_bf16 : AMDGPUBuiltin<"_Vector<8, float>(_Constant bool, _Vector<16, __bf16>, _Constant bool, _Vector<16, __bf16>, _Constant short, _Vector<8, float>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_bf16_16x16x32_bf16 : AMDGPUBuiltin<"_Vector<8, __bf16>(_Constant bool, _Vector<16, __bf16>, _Constant bool, _Vector<16, __bf16>, _Constant short, _Vector<8, __bf16>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_bf16f32_16x16x32_bf16 : AMDGPUBuiltin<"_Vector<8, __bf16>(_Constant bool, _Vector<16, __bf16>, _Constant bool, _Vector<16, __bf16>, _Constant short, _Vector<8, float>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<8, int>, _Vector<8, int>, _Constant short, _Vector<8, float>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<8, int>, _Vector<8, int>, _Constant short, _Vector<8, float>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<8, int>, _Vector<8, int>, _Constant short, _Vector<8, float>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<8, int>, _Vector<8, int>, _Constant short, _Vector<8, float>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<8, int>, _Vector<8, int>, _Constant short, _Vector<8, __fp16>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<8, int>, _Vector<8, int>, _Constant short, _Vector<8, __fp16>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<8, int>, _Vector<8, int>, _Constant short, _Vector<8, __fp16>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<8, int>, _Vector<8, int>, _Constant short, _Vector<8, __fp16>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_i32_16x16x64_iu8 : AMDGPUBuiltin<"_Vector<8, int>(_Constant bool, _Vector<8, int>, _Constant bool, _Vector<8, int>, _Vector<8, int>, _Constant bool, _Constant bool, ...)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<16, int>, _Vector<16, int>, _Constant short, _Vector<8, __fp16>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<16, int>, _Vector<16, int>, _Constant short, _Vector<8, __fp16>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<16, int>, _Vector<16, int>, _Constant short, _Vector<8, __fp16>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<16, int>, _Vector<16, int>, _Constant short, _Vector<8, __fp16>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x128_f8f6f4 : AMDGPUBuiltin<"_Vector<8, float>(_Constant int, _Vector<16, int>, _Constant int, _Vector<16, int>, _Constant short, _Vector<8, float>)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<16, int>, _Vector<16, int>, _Constant short, _Vector<8, float>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<16, int>, _Vector<16, int>, _Constant short, _Vector<8, float>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<16, int>, _Vector<16, int>, _Constant short, _Vector<8, float>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<16, int>, _Vector<16, int>, _Constant short, _Vector<8, float>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_scale_f32_16x16x128_f8f6f4 : AMDGPUBuiltin<"_Vector<8, float>(_Constant int, _Vector<16, int>, _Constant int, _Vector<16, int>, _Constant short, _Vector<8, float>, _Constant int, _Constant int, int, _Constant int, _Constant int, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4 : AMDGPUBuiltin<"_Vector<8, float>(_Constant int, _Vector<16, int>, _Constant int, _Vector<16, int>, _Constant short, _Vector<8, float>, _Constant int, _Constant int, long int, _Constant int, _Constant int, long int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x32_f16 : AMDGPUBuiltin<"_Vector<8, float>(_Constant bool, _Vector<16, __fp16>, _Constant bool, _Vector<16, __fp16>, _Constant short, _Vector<8, float>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f16_16x16x32_f16 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Constant bool, _Vector<16, __fp16>, _Constant bool, _Vector<16, __fp16>, _Constant short, _Vector<8, __fp16>, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_32x16x128_f4 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<16, int>, _Vector<8, int>, _Constant short, _Vector<16, float>)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_scale_f32_32x16x128_f4 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<16, int>, _Vector<8, int>, _Constant short, _Vector<16, float>, _Constant int, _Constant int, int, _Constant int, _Constant int, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_scale16_f32_32x16x128_f4 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<16, int>, _Vector<8, int>, _Constant short, _Vector<16, float>, _Constant int, _Constant int, long int, _Constant int, _Constant int, long int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x64_bf16 : AMDGPUBuiltin<"_Vector<8, float>(_Constant bool, _Vector<16, __bf16>, _Constant bool, _Vector<32, __bf16>, _Vector<8, float>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_bf16_16x16x64_bf16 : AMDGPUBuiltin<"_Vector<8, __bf16>(_Constant bool, _Vector<16, __bf16>, _Constant bool, _Vector<32, __bf16>, _Vector<8, __bf16>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16 : AMDGPUBuiltin<"_Vector<8, float>(_Constant bool, _Vector<16, __bf16>, _Constant bool, _Vector<32, __bf16>, _Vector<8, float>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<8, int>, _Vector<16, int>, _Vector<8, float>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<8, int>, _Vector<16, int>, _Vector<8, float>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<8, int>, _Vector<16, int>, _Vector<8, float>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<8, int>, _Vector<16, int>, _Vector<8, float>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<8, int>, _Vector<16, int>, _Vector<8, __fp16>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<8, int>, _Vector<16, int>, _Vector<8, __fp16>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<8, int>, _Vector<16, int>, _Vector<8, __fp16>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Vector<8, int>, _Vector<16, int>, _Vector<8, __fp16>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_i32_16x16x128_iu8 : AMDGPUBuiltin<"_Vector<8, int>(_Constant bool, _Vector<8, int>, _Constant bool, _Vector<16, int>, _Vector<8, int>, int, _Constant bool, _Constant bool, ...)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x64_f16 : AMDGPUBuiltin<"_Vector<8, float>(_Constant bool, _Vector<16, __fp16>, _Constant bool, _Vector<32, __fp16>, _Vector<8, float>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f16_16x16x64_f16 : AMDGPUBuiltin<"_Vector<8, __fp16>(_Constant bool, _Vector<16, __fp16>, _Constant bool, _Vector<32, __fp16>, _Vector<8, __fp16>, int, _Constant bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+
+// GFX12.5 128B cooperative atomics
+def __builtin_amdgcn_cooperative_atomic_load_32x4B : AMDGPUBuiltin<"int(int *, _Constant int, char const *)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_cooperative_atomic_store_32x4B : AMDGPUBuiltin<"void(int *, int, _Constant int, char const *)", [Const], "gfx1250-insts,wavefrontsize32">;
+
+def __builtin_amdgcn_cooperative_atomic_load_16x8B : AMDGPUBuiltin<"_Vector<2, int>(_Vector<2, int *>, _Constant int, char const *)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_cooperative_atomic_store_16x8B : AMDGPUBuiltin<"void(_Vector<2, int *>, _Vector<2, int>, _Constant int, char const *)", [Const], "gfx1250-insts,wavefrontsize32">;
+
+def __builtin_amdgcn_cooperative_atomic_load_8x16B : AMDGPUBuiltin<"_Vector<4, int>(_Vector<4, int *>, _Constant int, char const *)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_cooperative_atomic_store_8x16B : AMDGPUBuiltin<"void(_Vector<4, int *>, _Vector<4, int>, _Constant int, char const *)", [Const], "gfx1250-insts,wavefrontsize32">;
+
+//===----------------------------------------------------------------------===//
+// Image builtins
+//===----------------------------------------------------------------------===//
+def __builtin_amdgcn_image_load_1d_v4f32_i32 : AMDGPUBuiltin<"_Vector<4, float>(int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_1d_v4f16_i32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_1darray_v4f32_i32 : AMDGPUBuiltin<"_Vector<4, float>(int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_1darray_v4f16_i32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_2d_f32_i32 : AMDGPUBuiltin<"float(int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_2d_v4f32_i32 : AMDGPUBuiltin<"_Vector<4, float>(int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_2d_v4f16_i32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_2darray_f32_i32 : AMDGPUBuiltin<"float(int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_2darray_v4f32_i32 : AMDGPUBuiltin<"_Vector<4, float>(int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_2darray_v4f16_i32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_3d_v4f32_i32 : AMDGPUBuiltin<"_Vector<4, float>(int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_3d_v4f16_i32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_cube_v4f32_i32 : AMDGPUBuiltin<"_Vector<4, float>(int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_cube_v4f16_i32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_mip_1d_v4f32_i32 : AMDGPUBuiltin<"_Vector<4, float>(int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_mip_1d_v4f16_i32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_mip_1darray_v4f32_i32 : AMDGPUBuiltin<"_Vector<4, float>(int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_mip_1darray_v4f16_i32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_mip_2d_f32_i32 : AMDGPUBuiltin<"float(int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_mip_2d_v4f32_i32 : AMDGPUBuiltin<"_Vector<4, float>(int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_mip_2d_v4f16_i32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_mip_2darray_f32_i32 : AMDGPUBuiltin<"float(int, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_mip_2darray_v4f32_i32 : AMDGPUBuiltin<"_Vector<4, float>(int, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_mip_2darray_v4f16_i32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_mip_3d_v4f32_i32 : AMDGPUBuiltin<"_Vector<4, float>(int, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_mip_3d_v4f16_i32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_mip_cube_v4f32_i32 : AMDGPUBuiltin<"_Vector<4, float>(int, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_load_mip_cube_v4f16_i32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_1d_v4f32_i32 : AMDGPUBuiltin<"void(_Vector<4, float>, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_1d_v4f16_i32 : AMDGPUBuiltin<"void(_Vector<4, _Float16>, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_1darray_v4f32_i32 : AMDGPUBuiltin<"void(_Vector<4, float>, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_1darray_v4f16_i32 : AMDGPUBuiltin<"void(_Vector<4, _Float16>, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_2d_f32_i32 : AMDGPUBuiltin<"void(float, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_2d_v4f32_i32 : AMDGPUBuiltin<"void(_Vector<4, float>, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_2d_v4f16_i32 : AMDGPUBuiltin<"void(_Vector<4, _Float16>, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_2darray_f32_i32 : AMDGPUBuiltin<"void(float, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_2darray_v4f32_i32 : AMDGPUBuiltin<"void(_Vector<4, float>, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_2darray_v4f16_i32 : AMDGPUBuiltin<"void(_Vector<4, _Float16>, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_3d_v4f32_i32 : AMDGPUBuiltin<"void(_Vector<4, float>, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_3d_v4f16_i32 : AMDGPUBuiltin<"void(_Vector<4, _Float16>, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_cube_v4f32_i32 : AMDGPUBuiltin<"void(_Vector<4, float>, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_cube_v4f16_i32 : AMDGPUBuiltin<"void(_Vector<4, _Float16>, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_mip_1d_v4f32_i32 : AMDGPUBuiltin<"void(_Vector<4, float>, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_mip_1d_v4f16_i32 : AMDGPUBuiltin<"void(_Vector<4, _Float16>, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_mip_1darray_v4f32_i32 : AMDGPUBuiltin<"void(_Vector<4, float>, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_mip_1darray_v4f16_i32 : AMDGPUBuiltin<"void(_Vector<4, _Float16>, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_mip_2d_f32_i32 : AMDGPUBuiltin<"void(float, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_mip_2d_v4f32_i32 : AMDGPUBuiltin<"void(_Vector<4, float>, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_mip_2d_v4f16_i32 : AMDGPUBuiltin<"void(_Vector<4, _Float16>, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_mip_2darray_f32_i32 : AMDGPUBuiltin<"void(float, int, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_mip_2darray_v4f32_i32 : AMDGPUBuiltin<"void(_Vector<4, float>, int, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_mip_2darray_v4f16_i32 : AMDGPUBuiltin<"void(_Vector<4, _Float16>, int, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_mip_3d_v4f32_i32 : AMDGPUBuiltin<"void(_Vector<4, float>, int, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_mip_3d_v4f16_i32 : AMDGPUBuiltin<"void(_Vector<4, _Float16>, int, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_mip_cube_v4f32_i32 : AMDGPUBuiltin<"void(_Vector<4, float>, int, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_store_mip_cube_v4f16_i32 : AMDGPUBuiltin<"void(_Vector<4, _Float16>, int, int, int, int, int, __amdgpu_texture_t, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_sample_1d_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_sample_1d_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_sample_1darray_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_sample_1darray_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_sample_2d_f32_f32 : AMDGPUBuiltin<"float(int, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_sample_2d_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_sample_2d_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_sample_2darray_f32_f32 : AMDGPUBuiltin<"float(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_sample_2darray_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_sample_2darray_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_sample_3d_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_sample_3d_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_sample_cube_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_sample_cube_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "image-insts">;
+def __builtin_amdgcn_image_sample_lz_1d_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_lz_1d_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_lz_2d_f32_f32 : AMDGPUBuiltin<"float(int, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_lz_2d_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_lz_2d_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_lz_2darray_f32_f32 : AMDGPUBuiltin<"float(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_lz_3d_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_lz_3d_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_lz_cube_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_lz_cube_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_l_1d_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_l_1d_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_l_1darray_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_l_1darray_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_l_2d_f32_f32 : AMDGPUBuiltin<"float(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_l_2d_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_l_2d_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_l_2darray_f32_f32 : AMDGPUBuiltin<"float(int, float, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_l_2darray_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_l_2darray_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_l_3d_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_l_3d_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_l_cube_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_l_cube_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_d_1d_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_d_1d_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_d_1darray_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_d_1darray_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_d_2d_f32_f32 : AMDGPUBuiltin<"float(int, float, float, float, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_d_2d_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, float, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_d_2d_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, float, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_d_2darray_f32_f32 : AMDGPUBuiltin<"float(int, float, float, float, float, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_d_2darray_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, float, float, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_d_2darray_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, float, float, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_d_3d_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, float, float, float, float, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_sample_d_3d_v4f16_f32 : AMDGPUBuiltin<"_Vector<4, _Float16>(int, float, float, float, float, float, float, float, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
+def __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32 : AMDGPUBuiltin<"_Vector<4, float>(int, float, float, __amdgpu_texture_t, _Vector<4, int>, bool, int, int)", [Const], "extended-image-insts">;
diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt
index cfd165e6fa7e1..2412b021eb80a 100644
--- a/clang/include/clang/Basic/CMakeLists.txt
+++ b/clang/include/clang/Basic/CMakeLists.txt
@@ -90,6 +90,10 @@ clang_tablegen(Builtins.inc -gen-clang-builtins
   SOURCE Builtins.td
   TARGET ClangBuiltins)
 
+clang_tablegen(BuiltinsAMDGPU.inc -gen-clang-builtins
+  SOURCE BuiltinsAMDGPU.td
+  TARGET ClangBuiltinsAMDGPU)
+
 clang_tablegen(BuiltinsBPF.inc -gen-clang-builtins
   SOURCE BuiltinsBPF.td
   TARGET ClangBuiltinsBPF)
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index d5e8299f8b080..94ae62e80b65b 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -135,8 +135,9 @@ namespace clang {
   namespace AMDGPU {
   enum {
     LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
-  #define BUILTIN(ID, TYPE, ATTRS) BI##ID,
-  #include "clang/Basic/BuiltinsAMDGPU.def"
+#define GET_BUILTIN_ENUMERATORS
+#include "clang/Basic/BuiltinsAMDGPU.inc"
+#undef GET_BUILTIN_ENUMERATORS
     LastTSBuiltin
   };
   }
diff --git a/clang/include/module.modulemap b/clang/include/module.modulemap
index a11c8683c601e..f7d3b3e4fe9dc 100644
--- a/clang/include/module.modulemap
+++ b/clang/include/module.modulemap
@@ -43,7 +43,6 @@ module Clang_Basic {
   textual header "clang/Basic/BuiltinsAArch64.def"
   textual header "clang/Basic/BuiltinsAArch64NeonSVEBridge.def"
   textual header "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
-  textual header "clang/Basic/BuiltinsAMDGPU.def"
   textual header "clang/Basic/BuiltinsARM.def"
   textual header "clang/Basic/BuiltinsHexagonMapCustomDep.def"
   textual header "clang/Basic/BuiltinsLoongArchBase.def"
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 366cf7c440060..5df6134b72499 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -90,18 +90,16 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
 static constexpr int NumBuiltins =
     clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin;
 
-static constexpr llvm::StringTable BuiltinStrings =
-    CLANG_BUILTIN_STR_TABLE_START
-#define BUILTIN CLANG_BUILTIN_STR_TABLE
-#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE
-#include "clang/Basic/BuiltinsAMDGPU.def"
-    ;
-
-static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>({
-#define BUILTIN CLANG_BUILTIN_ENTRY
-#define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY
-#include "clang/Basic/BuiltinsAMDGPU.def"
-});
+#define GET_BUILTIN_STR_TABLE
+#include "clang/Basic/BuiltinsAMDGPU.inc"
+#undef GET_BUILTIN_STR_TABLE
+
+static constexpr Builtin::Info BuiltinInfos[] = {
+#define GET_BUILTIN_INFOS
+#include "clang/Basic/BuiltinsAMDGPU.inc"
+#undef GET_BUILTIN_INFOS
+};
+static_assert(std::size(BuiltinInfos) == NumBuiltins);
 
 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",



More information about the cfe-commits mailing list