[clang] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot (PR #69567)
Rana Pratap Reddy via cfe-commits
cfe-commits at lists.llvm.org
Thu Oct 19 00:26:01 PDT 2023
https://github.com/ranapratap55 created https://github.com/llvm/llvm-project/pull/69567
Currently __builtin_read_exec_hi lowers to llvm.read_register, this patch lowers it to use amdgcn_ballot.
>From 340e633da9e3ab10efc0c0d430b9546cd2f19cfe Mon Sep 17 00:00:00 2001
From: ranapratap55 <RanaPratapReddy.Nimmakayala at amd.com>
Date: Thu, 19 Oct 2023 12:52:13 +0530
Subject: [PATCH] [AMDGPU] Lower __builtin_read_exec_hi to use amdgcn_ballot
---
clang/lib/CodeGen/CGBuiltin.cpp | 27 +++++++++++++++------
clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 4 ++-
2 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index db9f354fa8386d3..d60826f293f0c46 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7997,14 +7997,26 @@ enum SpecialRegisterAccessKind {
static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
llvm::Type *RegisterType,
- llvm::Type *ValueType) {
+ llvm::Type *ValueType, bool isExecHi) {
CodeGen::CGBuilderTy &Builder = CGF.Builder;
CodeGen::CodeGenModule &CGM = CGF.CGM;
llvm::Type *ResultType = CGF.ConvertType(E->getType());
- Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
- llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
- return Call;
+ llvm::Value *Call;
+ Function *F;
+
+ if (isExecHi) {
+ F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
+ Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+ Value *C1 = llvm::ConstantInt::get(ValueType, 32);
+ Value *Rt2 = Builder.CreateLShr(Call, C1);
+ Rt2 = Builder.CreateTruncOrBitCast(Rt2, CGF.Int32Ty);
+ return Rt2;
+ } else {
+ F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
+ Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+ return Call;
+ }
}
// Generates the IR for the read/write special register builtin,
@@ -17837,10 +17849,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
}
case AMDGPU::BI__builtin_amdgcn_read_exec:
+ return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
- case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
- return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty);
- }
+ return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
+ case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
+ return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 8938642e3b19f8c..0bc9a54682d3e31 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -526,7 +526,9 @@ void test_read_exec_lo(global uint* out) {
// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]]
// CHECK-LABEL: @test_read_exec_hi(
-// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
+// CHECK: lshr i64 [[A:%.*]], 32
+// CHECK: trunc i64 [[B:%.*]] to i32
void test_read_exec_hi(global uint* out) {
*out = __builtin_amdgcn_read_exec_hi();
}
More information about the cfe-commits
mailing list