[clang] [llvm] [Clang][AMDGPU] Add builtins for some buffer resource atomics (PR #149216)

via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 1 11:11:23 PDT 2025


https://github.com/zGoldthorpe updated https://github.com/llvm/llvm-project/pull/149216

>From c82bff38f45c65d8fbac77bd4ef3369a16b768d9 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <Zach.Goldthorpe at amd.com>
Date: Wed, 16 Jul 2025 17:45:04 -0500
Subject: [PATCH 1/4] Exposed some AMDGCN raw buffer atomic intrinsics to
 clang.

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def  |  9 +++
 clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp   | 46 +++++++++++
 ...cn-raw-buffer-atomics-gfx908-target-err.cl | 16 ++++
 ...ns-amdgcn-raw-buffer-atomics-gfx90a-err.cl | 16 ++++
 ...iltins-amdgcn-raw-buffer-atomics-gfx90a.cl | 76 +++++++++++++++++++
 5 files changed, 163 insertions(+)
 create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx908-target-err.cl
 create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a-err.cl
 create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a.cl

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 313c0e640d240..58e26f0a2c458 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -252,6 +252,15 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fmax_f64, "dd*0d", "t", "gfx90a-inst
 TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f64, "dd*3d", "t", "gfx90a-insts")
 TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f32, "ff*3f", "t", "gfx8-insts")
 
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32, "iiQbiiIi", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32, "ffQbiiIi", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64, "ddQbiiIi", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16, "V2hV2hQbiiIi", "t", "gfx90a-insts")
+
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32, "ffQbiiIi", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64, "ddQbiiIi", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16, "V2hV2hQbiiIi", "t", "gfx90a-insts")
+
 TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f32, "ff*0f", "t", "gfx940-insts")
 TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2f16, "V2hV2h*0V2h", "t", "atomic-flat-pk-add-16-insts")
 TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "atomic-flat-pk-add-16-insts")
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index dcfdea648e93c..4ed6439c0d2d4 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -1439,6 +1439,52 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
         F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
             EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
   }
+  case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
+  case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
+  case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64:
+  case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
+  case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
+  case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
+  case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16: {
+    llvm::Type *RetTy;
+    switch (BuiltinID) {
+    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
+      RetTy = Int32Ty;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
+    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
+      RetTy = FloatTy;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64:
+    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
+      RetTy = DoubleTy;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
+    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16:
+      RetTy = llvm::FixedVectorType::get(HalfTy, 2);
+    }
+    unsigned IID;
+    switch (BuiltinID) {
+    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
+      IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_add;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
+    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64:
+    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
+      IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
+    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
+    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16:
+      IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax;
+      break;
+    }
+    llvm::Function *F = CGM.getIntrinsic(IID, RetTy);
+    return Builder.CreateCall(
+        F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
+            EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)),
+            EmitScalarExpr(E->getArg(4))});
+  }
   case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
     return emitBuiltinWithOneOverloadedType<2>(
         *this, E, Intrinsic::amdgcn_s_prefetch_data);
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx908-target-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx908-target-err.cl
new file mode 100644
index 0000000000000..3e72ef98de2c1
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx908-target-err.cl
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -S -verify=gfx908,expected -o - %s
+
+// REQUIRES: amdgpu-registered-target
+
+typedef half __attribute__((ext_vector_type(2))) float16x2_t;
+
+void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, double f64, float16x2_t v2f16, int offset, int soffset) {
+  i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32' needs target feature gfx90a-insts}}
+  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32' needs target feature gfx90a-insts}}
+  f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64(f64, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64' needs target feature gfx90a-insts}}
+  v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16' needs target feature gfx90a-insts}}
+
+  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' needs target feature gfx90a-insts}}
+  f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' needs target feature gfx90a-insts}}
+  v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16(v2f16, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16' needs target feature gfx90a-insts}}
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a-err.cl
new file mode 100644
index 0000000000000..b32c9cc620896
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a-err.cl
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -S -verify=gfx90a,expected -o - %s
+
+// REQUIRES: amdgpu-registered-target
+
+typedef half __attribute__((ext_vector_type(2))) float16x2_t;
+
+void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, double f64, float16x2_t v2f16, int offset, int soffset, int x) {
+  i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32' must be a constant integer}}
+  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32' must be a constant integer}}
+  f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64' must be a constant integer}}
+  v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16' must be a constant integer}}
+
+  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' must be a constant integer}}
+  f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' must be a constant integer}}
+  v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16(v2f16, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16' must be a constant integer}}
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a.cl
new file mode 100644
index 0000000000000..f13d4f3459d58
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a.cl
@@ -0,0 +1,76 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -emit-llvm -o - %s | FileCheck %s
+
+// REQUIRES: amdgpu-registered-target
+
+typedef half __attribute__((ext_vector_type(2))) float16x2_t;
+
+// CHECK-LABEL: define dso_local i32 @test_atomic_add_i32(
+// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], i32 noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
+// CHECK-NEXT:    ret i32 [[TMP0]]
+//
+int test_atomic_add_i32(__amdgpu_buffer_rsrc_t rsrc, int x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(x, rsrc, offset, soffset, 0);
+}
+
+// CHECK-LABEL: define dso_local float @test_atomic_fadd_f32(
+// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], float noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+float test_atomic_fadd_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(x, rsrc, offset, soffset, 0);
+}
+
+// CHECK-LABEL: define dso_local double @test_atomic_fadd_f64(
+// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], double noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
+// CHECK-NEXT:    ret double [[TMP0]]
+//
+double test_atomic_fadd_f64(__amdgpu_buffer_rsrc_t rsrc, double x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64(x, rsrc, offset, soffset, 0);
+}
+
+// CHECK-LABEL: define dso_local <2 x half> @test_atomic_fadd_v2f16(
+// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], <2 x half> noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
+// CHECK-NEXT:    ret <2 x half> [[TMP0]]
+//
+float16x2_t test_atomic_fadd_v2f16(__amdgpu_buffer_rsrc_t rsrc, float16x2_t x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(x, rsrc, offset, soffset, 0);
+}
+
+// CHECK-LABEL: define dso_local float @test_atomic_fmax_f32(
+// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], float noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+float test_atomic_fmax_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(x, rsrc, offset, soffset, 0);
+}
+
+// CHECK-LABEL: define dso_local double @test_atomic_fmax_f64(
+// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], double noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
+// CHECK-NEXT:    ret double [[TMP0]]
+//
+double test_atomic_fmax_f64(__amdgpu_buffer_rsrc_t rsrc, double x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(x, rsrc, offset, soffset, 0);
+}
+
+// CHECK-LABEL: define dso_local <2 x half> @test_atomic_fmax_v2f16(
+// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], <2 x half> noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.v2f16(<2 x half> [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
+// CHECK-NEXT:    ret <2 x half> [[TMP0]]
+//
+float16x2_t test_atomic_fmax_v2f16(__amdgpu_buffer_rsrc_t rsrc, float16x2_t x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16(x, rsrc, offset, soffset, 0);
+}

>From 59e1b303486a6c051deeeec6a91773bdfb82aa3e Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <Zach.Goldthorpe at amd.com>
Date: Thu, 17 Jul 2025 12:29:01 -0500
Subject: [PATCH 2/4] Refined choice of target features.

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def  | 16 ++--
 clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp   |  9 +--
 ...iltins-amdgcn-raw-buffer-atomic-add-err.cl | 10 +++
 ...amdgcn-raw-buffer-atomic-add-target-err.cl |  9 +++
 .../builtins-amdgcn-raw-buffer-atomic-add.cl  | 36 +++++++++
 ...iltins-amdgcn-raw-buffer-atomic-max-err.cl |  7 ++
 ...amdgcn-raw-buffer-atomic-max-target-err.cl |  7 ++
 .../builtins-amdgcn-raw-buffer-atomic-max.cl  | 24 ++++++
 ...cn-raw-buffer-atomics-gfx908-target-err.cl | 16 ----
 ...ns-amdgcn-raw-buffer-atomics-gfx90a-err.cl | 16 ----
 ...iltins-amdgcn-raw-buffer-atomics-gfx90a.cl | 76 -------------------
 11 files changed, 102 insertions(+), 124 deletions(-)
 create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-err.cl
 create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl
 create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add.cl
 create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl
 create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl
 create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl
 delete mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx908-target-err.cl
 delete mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a-err.cl
 delete mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a.cl

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 58e26f0a2c458..9b1e618a490a5 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -163,6 +163,13 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2UiQbiiIi", "n")
 BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n")
 BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n")
 
+BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32, "iiQbiiIi", "t")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32, "ffQbiiIi", "t", "atomic-fadd-rtn-insts")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16, "V2hV2hQbiiIi", "t", "atomic-buffer-global-pk-add-f16-insts")
+
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32, "ffQbiiIi", "t", "atomic-fmin-fmax-global-f32")
+TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64, "ddQbiiIi", "t", "atomic-fmin-fmax-global-f64")
+
 TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "t", "vmem-to-lds-load-insts")
 TARGET_BUILTIN(__builtin_amdgcn_struct_ptr_buffer_load_lds, "vQbv*3IUiiiiIiIi", "t", "vmem-to-lds-load-insts")
 
@@ -252,15 +259,6 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fmax_f64, "dd*0d", "t", "gfx90a-inst
 TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f64, "dd*3d", "t", "gfx90a-insts")
 TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f32, "ff*3f", "t", "gfx8-insts")
 
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32, "iiQbiiIi", "t", "gfx90a-insts")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32, "ffQbiiIi", "t", "gfx90a-insts")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64, "ddQbiiIi", "t", "gfx90a-insts")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16, "V2hV2hQbiiIi", "t", "gfx90a-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32, "ffQbiiIi", "t", "gfx90a-insts")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64, "ddQbiiIi", "t", "gfx90a-insts")
-TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16, "V2hV2hQbiiIi", "t", "gfx90a-insts")
-
 TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f32, "ff*0f", "t", "gfx940-insts")
 TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2f16, "V2hV2h*0V2h", "t", "atomic-flat-pk-add-16-insts")
 TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "atomic-flat-pk-add-16-insts")
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 4ed6439c0d2d4..0c9b8fafc8cd2 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -1441,11 +1441,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
   }
   case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
   case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
-  case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64:
   case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
   case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
-  case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
-  case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16: {
+  case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: {
     llvm::Type *RetTy;
     switch (BuiltinID) {
     case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
@@ -1455,13 +1453,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
       RetTy = FloatTy;
       break;
-    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64:
     case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
       RetTy = DoubleTy;
       break;
     case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
-    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16:
       RetTy = llvm::FixedVectorType::get(HalfTy, 2);
+      break;
     }
     unsigned IID;
     switch (BuiltinID) {
@@ -1469,13 +1466,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_add;
       break;
     case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
-    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64:
     case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd;
       break;
     case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
     case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
-    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16:
       IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax;
       break;
     }
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-err.cl
new file mode 100644
index 0000000000000..5c5ae937797e8
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-err.cl
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -S -verify=gfx90a,expected -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef half __attribute__((ext_vector_type(2))) float16x2_t;
+
+void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, float16x2_t v2f16, int offset, int soffset, int x) {
+  i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32' must be a constant integer}}
+  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32' must be a constant integer}}
+  v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16' must be a constant integer}}
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl
new file mode 100644
index 0000000000000..509a498af5efa
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -S -verify=gfx908,expected -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef half __attribute__((ext_vector_type(2))) float16x2_t;
+
+void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, float f32, float16x2_t v2f16, int offset, int soffset) {
+  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32' needs target feature atomic-fadd-rtn-insts}}
+  v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts}}
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add.cl
new file mode 100644
index 0000000000000..7641e10c89c04
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add.cl
@@ -0,0 +1,36 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -emit-llvm -o - %s | FileCheck %s
+
+// REQUIRES: amdgpu-registered-target
+
+typedef half __attribute__((ext_vector_type(2))) float16x2_t;
+
+// CHECK-LABEL: define dso_local i32 @test_atomic_add_i32(
+// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], i32 noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
+// CHECK-NEXT:    ret i32 [[TMP0]]
+//
+int test_atomic_add_i32(__amdgpu_buffer_rsrc_t rsrc, int x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(x, rsrc, offset, soffset, 0);
+}
+
+// CHECK-LABEL: define dso_local float @test_atomic_fadd_f32(
+// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], float noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+float test_atomic_fadd_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(x, rsrc, offset, soffset, 0);
+}
+
+// CHECK-LABEL: define dso_local <2 x half> @test_atomic_fadd_v2f16(
+// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], <2 x half> noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
+// CHECK-NEXT:    ret <2 x half> [[TMP0]]
+//
+float16x2_t test_atomic_fadd_v2f16(__amdgpu_buffer_rsrc_t rsrc, float16x2_t x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(x, rsrc, offset, soffset, 0);
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl
new file mode 100644
index 0000000000000..aeb7426177d6d
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -S -verify=expected -o - %s
+// REQUIRES: amdgpu-registered-target
+
+void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, float f32, double f64, int offset, int soffset, int x) {
+  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' must be a constant integer}}
+  f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' must be a constant integer}}
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl
new file mode 100644
index 0000000000000..3f1ad416d132f
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -verify=expected -o - %s
+// REQUIRES: amdgpu-registered-target
+
+void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, float f32, double f64, int offset, int soffset) {
+  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' needs target feature atomic-fmin-fmax-global-f32}}
+  f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' needs target feature atomic-fmin-fmax-global-f64}}
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl
new file mode 100644
index 0000000000000..d2a9314906a65
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl
@@ -0,0 +1,24 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -emit-llvm -o - %s | FileCheck %s
+
+// REQUIRES: amdgpu-registered-target
+
+// CHECK-LABEL: define dso_local float @test_atomic_fmax_f32(
+// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], float noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+float test_atomic_fmax_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(x, rsrc, offset, soffset, 0);
+}
+
+// CHECK-LABEL: define dso_local double @test_atomic_fmax_f64(
+// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], double noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
+// CHECK-NEXT:    ret double [[TMP0]]
+//
+double test_atomic_fmax_f64(__amdgpu_buffer_rsrc_t rsrc, double x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(x, rsrc, offset, soffset, 0);
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx908-target-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx908-target-err.cl
deleted file mode 100644
index 3e72ef98de2c1..0000000000000
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx908-target-err.cl
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -S -verify=gfx908,expected -o - %s
-
-// REQUIRES: amdgpu-registered-target
-
-typedef half __attribute__((ext_vector_type(2))) float16x2_t;
-
-void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, double f64, float16x2_t v2f16, int offset, int soffset) {
-  i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32' needs target feature gfx90a-insts}}
-  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32' needs target feature gfx90a-insts}}
-  f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64(f64, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64' needs target feature gfx90a-insts}}
-  v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16' needs target feature gfx90a-insts}}
-
-  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' needs target feature gfx90a-insts}}
-  f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' needs target feature gfx90a-insts}}
-  v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16(v2f16, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16' needs target feature gfx90a-insts}}
-}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a-err.cl
deleted file mode 100644
index b32c9cc620896..0000000000000
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a-err.cl
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -S -verify=gfx90a,expected -o - %s
-
-// REQUIRES: amdgpu-registered-target
-
-typedef half __attribute__((ext_vector_type(2))) float16x2_t;
-
-void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, double f64, float16x2_t v2f16, int offset, int soffset, int x) {
-  i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32' must be a constant integer}}
-  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32' must be a constant integer}}
-  f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64' must be a constant integer}}
-  v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16' must be a constant integer}}
-
-  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' must be a constant integer}}
-  f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' must be a constant integer}}
-  v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16(v2f16, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16' must be a constant integer}}
-}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a.cl
deleted file mode 100644
index f13d4f3459d58..0000000000000
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a.cl
+++ /dev/null
@@ -1,76 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -emit-llvm -o - %s | FileCheck %s
-
-// REQUIRES: amdgpu-registered-target
-
-typedef half __attribute__((ext_vector_type(2))) float16x2_t;
-
-// CHECK-LABEL: define dso_local i32 @test_atomic_add_i32(
-// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], i32 noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
-// CHECK-NEXT:    ret i32 [[TMP0]]
-//
-int test_atomic_add_i32(__amdgpu_buffer_rsrc_t rsrc, int x, int offset, int soffset) {
-  return __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(x, rsrc, offset, soffset, 0);
-}
-
-// CHECK-LABEL: define dso_local float @test_atomic_fadd_f32(
-// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], float noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
-// CHECK-NEXT:    ret float [[TMP0]]
-//
-float test_atomic_fadd_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) {
-  return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(x, rsrc, offset, soffset, 0);
-}
-
-// CHECK-LABEL: define dso_local double @test_atomic_fadd_f64(
-// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], double noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
-// CHECK-NEXT:    ret double [[TMP0]]
-//
-double test_atomic_fadd_f64(__amdgpu_buffer_rsrc_t rsrc, double x, int offset, int soffset) {
-  return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64(x, rsrc, offset, soffset, 0);
-}
-
-// CHECK-LABEL: define dso_local <2 x half> @test_atomic_fadd_v2f16(
-// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], <2 x half> noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
-// CHECK-NEXT:    ret <2 x half> [[TMP0]]
-//
-float16x2_t test_atomic_fadd_v2f16(__amdgpu_buffer_rsrc_t rsrc, float16x2_t x, int offset, int soffset) {
-  return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(x, rsrc, offset, soffset, 0);
-}
-
-// CHECK-LABEL: define dso_local float @test_atomic_fmax_f32(
-// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], float noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
-// CHECK-NEXT:    ret float [[TMP0]]
-//
-float test_atomic_fmax_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) {
-  return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(x, rsrc, offset, soffset, 0);
-}
-
-// CHECK-LABEL: define dso_local double @test_atomic_fmax_f64(
-// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], double noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
-// CHECK-NEXT:    ret double [[TMP0]]
-//
-double test_atomic_fmax_f64(__amdgpu_buffer_rsrc_t rsrc, double x, int offset, int soffset) {
-  return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(x, rsrc, offset, soffset, 0);
-}
-
-// CHECK-LABEL: define dso_local <2 x half> @test_atomic_fmax_v2f16(
-// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], <2 x half> noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.v2f16(<2 x half> [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
-// CHECK-NEXT:    ret <2 x half> [[TMP0]]
-//
-float16x2_t test_atomic_fmax_v2f16(__amdgpu_buffer_rsrc_t rsrc, float16x2_t x, int offset, int soffset) {
-  return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16(x, rsrc, offset, soffset, 0);
-}

>From 16fcd493d898399e484f000fa9682555410a4edd Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <Zach.Goldthorpe at amd.com>
Date: Fri, 18 Jul 2025 11:56:35 -0500
Subject: [PATCH 3/4] Addressed reviewer feedback.

---
 clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp   | 44 +++----------------
 .../builtins-amdgcn-raw-buffer-atomic-add.cl  |  2 +
 ...iltins-amdgcn-raw-buffer-atomic-max-err.cl |  7 ---
 .../builtins-amdgcn-raw-buffer-atomic-max.cl  |  5 ++-
 ...iltins-amdgcn-raw-buffer-atomic-add-err.cl |  0
 ...amdgcn-raw-buffer-atomic-add-target-err.cl |  0
 ...iltins-amdgcn-raw-buffer-atomic-max-err.cl |  9 ++++
 ...amdgcn-raw-buffer-atomic-max-target-err.cl |  0
 8 files changed, 21 insertions(+), 46 deletions(-)
 delete mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl
 rename clang/test/{CodeGenOpenCL => SemaOpenCL}/builtins-amdgcn-raw-buffer-atomic-add-err.cl (100%)
 rename clang/test/{CodeGenOpenCL => SemaOpenCL}/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl (100%)
 create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl
 rename clang/test/{CodeGenOpenCL => SemaOpenCL}/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl (100%)

diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 0c9b8fafc8cd2..0374d858160ce 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -1440,46 +1440,16 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
             EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
   }
   case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
+    return emitBuiltinWithOneOverloadedType<5>(
+        *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_add);
   case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
   case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
+    return emitBuiltinWithOneOverloadedType<5>(
+        *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd);
   case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
-  case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: {
-    llvm::Type *RetTy;
-    switch (BuiltinID) {
-    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
-      RetTy = Int32Ty;
-      break;
-    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
-    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
-      RetTy = FloatTy;
-      break;
-    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
-      RetTy = DoubleTy;
-      break;
-    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
-      RetTy = llvm::FixedVectorType::get(HalfTy, 2);
-      break;
-    }
-    unsigned IID;
-    switch (BuiltinID) {
-    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
-      IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_add;
-      break;
-    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
-    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
-      IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd;
-      break;
-    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
-    case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
-      IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax;
-      break;
-    }
-    llvm::Function *F = CGM.getIntrinsic(IID, RetTy);
-    return Builder.CreateCall(
-        F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
-            EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)),
-            EmitScalarExpr(E->getArg(4))});
-  }
+  case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
+    return emitBuiltinWithOneOverloadedType<5>(
+        *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax);
   case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
     return emitBuiltinWithOneOverloadedType<2>(
         *this, E, Intrinsic::amdgcn_s_prefetch_data);
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add.cl
index 7641e10c89c04..689046ace4479 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add.cl
@@ -1,5 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -emit-llvm -o - %s | FileCheck %s
 
 // REQUIRES: amdgpu-registered-target
 
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl
deleted file mode 100644
index aeb7426177d6d..0000000000000
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl
+++ /dev/null
@@ -1,7 +0,0 @@
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -S -verify=expected -o - %s
-// REQUIRES: amdgpu-registered-target
-
-void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, float f32, double f64, int offset, int soffset, int x) {
-  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' must be a constant integer}}
-  f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' must be a constant integer}}
-}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl
index d2a9314906a65..2b88ca2e3a4e6 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl
@@ -1,6 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -emit-llvm -o - %s | FileCheck %s
-
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942 -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -emit-llvm -o - %s | FileCheck %s
 // REQUIRES: amdgpu-registered-target
 
 // CHECK-LABEL: define dso_local float @test_atomic_fmax_f32(
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-add-err.cl
similarity index 100%
rename from clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-err.cl
rename to clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-add-err.cl
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl
similarity index 100%
rename from clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl
rename to clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-add-target-err.cl
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl
new file mode 100644
index 0000000000000..4ad4eb50dbbd4
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-err.cl
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -S -verify=expected -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942 -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -S -verify=expected -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -S -verify=expected -o - %s
+// REQUIRES: amdgpu-registered-target
+
+void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, float f32, double f64, int offset, int soffset, int x) {
+  f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' must be a constant integer}}
+  f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' must be a constant integer}}
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl
similarity index 100%
rename from clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl
rename to clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-atomic-max-target-err.cl

>From 877c00182080363ac114b24a0416f768be232cd5 Mon Sep 17 00:00:00 2001
From: Zach Goldthorpe <Zach.Goldthorpe at amd.com>
Date: Fri, 1 Aug 2025 13:10:29 -0500
Subject: [PATCH 4/4] Added `atomic-fmin-fmax-global-f*` features to
 TargetParser.

---
 clang/test/CodeGen/link-builtin-bitcode.c     |  8 +--
 clang/test/CodeGenOpenCL/amdgpu-features.cl   | 54 +++++++++----------
 .../builtins-amdgcn-raw-buffer-atomic-max.cl  |  6 +--
 llvm/lib/TargetParser/TargetParser.cpp        | 10 ++++
 4 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/clang/test/CodeGen/link-builtin-bitcode.c b/clang/test/CodeGen/link-builtin-bitcode.c
index 963a3956ff808..9a5b6de3c3b38 100644
--- a/clang/test/CodeGen/link-builtin-bitcode.c
+++ b/clang/test/CodeGen/link-builtin-bitcode.c
@@ -43,7 +43,7 @@ int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_in
 // CHECK-LABEL: @attr_incompatible
 // CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] {
 
-// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
-// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" }
-// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" }
-// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" }
+// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" }
+// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" }
+// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" }
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index 42768ac8def1f..690471e4d1083 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -82,34 +82,34 @@
 // GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
-// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1013: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1034: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1035: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1036: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1101: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1102: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+transpose-load-f4f6-insts,+wavefrontsize32"
+// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
+// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX1010: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1011: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1012: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1013: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1030: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1031: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1032: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1033: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1034: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1035: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1036: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1101: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1102: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+transpose-load-f4f6-insts,+wavefrontsize32"
 
-// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
+// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
 
 kernel void test() {}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl
index 2b88ca2e3a4e6..5cfb099bf8acc 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomic-max.cl
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942 -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx10-1-generic -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx10-3-generic -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s
 // REQUIRES: amdgpu-registered-target
 
 // CHECK-LABEL: define dso_local float @test_atomic_fmax_f32(
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index d7e206ef8cd4f..a13e4f8695459 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -456,6 +456,8 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
       Features["atomic-global-pk-add-bf16-inst"] = true;
       Features["atomic-ds-pk-add-16-insts"] = true;
       Features["setprio-inc-wg-inst"] = true;
+      Features["atomic-fmin-fmax-global-f32"] = true;
+      Features["atomic-fmin-fmax-global-f64"] = true;
       break;
     case GK_GFX1201:
     case GK_GFX1200:
@@ -484,6 +486,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
       Features["atomic-fadd-rtn-insts"] = true;
       Features["image-insts"] = true;
       Features["fp8-conversion-insts"] = true;
+      Features["atomic-fmin-fmax-global-f32"] = true;
       break;
     case GK_GFX1153:
     case GK_GFX1152:
@@ -512,6 +515,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
       Features["atomic-fadd-rtn-insts"] = true;
       Features["image-insts"] = true;
       Features["gws"] = true;
+      Features["atomic-fmin-fmax-global-f32"] = true;
       break;
     case GK_GFX1036:
     case GK_GFX1035:
@@ -540,6 +544,8 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
       Features["s-memtime-inst"] = true;
       Features["gws"] = true;
       Features["vmem-to-lds-load-insts"] = true;
+      Features["atomic-fmin-fmax-global-f32"] = true;
+      Features["atomic-fmin-fmax-global-f64"] = true;
       break;
     case GK_GFX1012:
     case GK_GFX1011:
@@ -565,6 +571,8 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
       Features["s-memtime-inst"] = true;
       Features["gws"] = true;
       Features["vmem-to-lds-load-insts"] = true;
+      Features["atomic-fmin-fmax-global-f32"] = true;
+      Features["atomic-fmin-fmax-global-f64"] = true;
       break;
     case GK_GFX950:
       Features["bitop3-insts"] = true;
@@ -616,11 +624,13 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
       Features["s-memtime-inst"] = true;
       Features["gws"] = true;
       Features["vmem-to-lds-load-insts"] = true;
+      Features["atomic-fmin-fmax-global-f64"] = true;
       break;
     case GK_GFX90A:
       Features["gfx90a-insts"] = true;
       Features["atomic-buffer-global-pk-add-f16-insts"] = true;
       Features["atomic-fadd-rtn-insts"] = true;
+      Features["atomic-fmin-fmax-global-f64"] = true;
       [[fallthrough]];
     case GK_GFX908:
       Features["dot3-insts"] = true;



More information about the llvm-commits mailing list