[clang] [AMDGPU] Lower __builtin_amdgcn_read_exec_hi to use amdgcn_ballot (PR #69567)

Rana Pratap Reddy via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 24 00:34:00 PDT 2023


https://github.com/ranapratap55 updated https://github.com/llvm/llvm-project/pull/69567

>From c2ed656fa149a9cb60fb43eb34f4b20186166b34 Mon Sep 17 00:00:00 2001
From: ranapratap55 <RanaPratapReddy.Nimmakayala at amd.com>
Date: Thu, 19 Oct 2023 12:52:13 +0530
Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot

---
 clang/lib/CodeGen/CGBuiltin.cpp               | 22 ++++++++++++-----
 .../CodeGenOpenCL/builtins-amdgcn-wave32.cl   | 24 +++++++++++++++++++
 .../CodeGenOpenCL/builtins-amdgcn-wave64.cl   | 23 ++++++++++++++++++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl   |  4 +++-
 4 files changed, 66 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e1211bb8949b665..02ed6b0c6e56673 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7995,15 +7995,24 @@ enum SpecialRegisterAccessKind {
   Write,
 };
 
+// Generated the IR for __builtin_read_exec_*.
+// Lowers the builtin to amdgcn_ballot intrinsic.
 static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
                                       llvm::Type *RegisterType,
-                                      llvm::Type *ValueType) {
+                                      llvm::Type *ValueType, bool isExecHi) {
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
   CodeGen::CodeGenModule &CGM = CGF.CGM;
 
-  llvm::Type *ResultType = CGF.ConvertType(E->getType());
-  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
+  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
   llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+
+  if (isExecHi) {
+    Value *C1 = llvm::ConstantInt::get(ValueType, 32);
+    Value *Rt2 = Builder.CreateLShr(Call, C1);
+    Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
+    return Rt2;
+  }
+
   return Call;
 }
 
@@ -17857,10 +17866,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
   }
   case AMDGPU::BI__builtin_amdgcn_read_exec:
+    return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
   case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
-  case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
-    return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty);
-  }
+    return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
+  case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
+    return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
   case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
index a4d14cf1f6cf0bd..43553131f63c549 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
@@ -13,6 +13,8 @@ void test_ballot_wave32(global uint* out, int a, int b)
   *out = __builtin_amdgcn_ballot_w32(a == b);
 }
 
+// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]]
+
 // CHECK-LABEL: @test_ballot_wave32_target_attr(
 // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
 __attribute__((target("wavefrontsize32")))
@@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b)
   *out = __builtin_amdgcn_ballot_w32(a == b);
 }
 
+// CHECK-LABEL: @test_read_exec(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+void test_read_exec(global uint* out) {
+  *out = __builtin_amdgcn_read_exec();
+}
+
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]]
+
+// CHECK-LABEL: @test_read_exec_lo(
+// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+void test_read_exec_lo(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_lo();
+}
+
+// CHECK-LABEL: @test_read_exec_hi(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+// CHECK: lshr i64 [[A:%.*]], 32
+// CHECK: trunc i64 [[B:%.*]] to i32
+void test_read_exec_hi(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_hi();
+}
+
 #if __AMDGCN_WAVEFRONT_SIZE != 32
 #error Wrong wavesize detected
 #endif
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
index 563c9a2a240c1dc..53f34c6a44ae7dc 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
@@ -13,6 +13,8 @@ void test_ballot_wave64(global ulong* out, int a, int b)
   *out = __builtin_amdgcn_ballot_w64(a == b);
 }
 
+// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]]
+
 // CHECK-LABEL: @test_ballot_wave64_target_attr(
 // CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}})
 __attribute__((target("wavefrontsize64")))
@@ -21,6 +23,27 @@ void test_ballot_wave64_target_attr(global ulong* out, int a, int b)
   *out = __builtin_amdgcn_ballot_w64(a == b);
 }
 
+// CHECK-LABEL: @test_read_exec(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+void test_read_exec(global ulong* out) {
+  *out = __builtin_amdgcn_read_exec();
+}
+
+// CHECK-LABEL: @test_read_exec_lo(
+// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+void test_read_exec_lo(global ulong* out) {
+  *out = __builtin_amdgcn_read_exec_lo();
+}
+
+// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]]
+
+// CHECK-LABEL: @test_read_exec_hi(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+// CHECK: lshr i64 [[A:%.*]], 32
+void test_read_exec_hi(global ulong* out) {
+  *out = __builtin_amdgcn_read_exec_hi();
+}
+
 #if __AMDGCN_WAVEFRONT_SIZE != 64
 #error Wrong wavesize detected
 #endif
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 8938642e3b19f8c..0bc9a54682d3e31 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -526,7 +526,9 @@ void test_read_exec_lo(global uint* out) {
 // CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]]
 
 // CHECK-LABEL: @test_read_exec_hi(
-// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+// CHECK: lshr i64 [[A:%.*]], 32
+// CHECK: trunc i64 [[B:%.*]] to i32
 void test_read_exec_hi(global uint* out) {
   *out = __builtin_amdgcn_read_exec_hi();
 }



More information about the cfe-commits mailing list