[clang] Revert "Revert "[AMDGPU] const-fold imm operands of (PR #71669)
Pravin Jagtap via cfe-commits
cfe-commits at lists.llvm.org
Wed Nov 8 20:24:28 PST 2023
https://github.com/pravinjagtap updated https://github.com/llvm/llvm-project/pull/71669
>From 63f870074df10d0e2624632ac2ab0cb0996b436c Mon Sep 17 00:00:00 2001
From: Pravin Jagtap <Pravin.Jagtap at amd.com>
Date: Wed, 8 Nov 2023 07:08:03 -0500
Subject: [PATCH 1/2] Revert "Revert "[AMDGPU] const-fold imm operands of
amdgcn_update_dpp intrinsic (#71139)""
This reverts commit d1fb9307951319eea3e869d78470341d603c8363 and fixes
the lit test clang/test/CodeGenHIP/dpp-const-fold.hip
---
clang/lib/CodeGen/CGBuiltin.cpp | 84 +++++++++---------------
clang/lib/CodeGen/CodeGenFunction.h | 2 +
clang/test/CodeGenHIP/dpp-const-fold.hip | 48 ++++++++++++++
3 files changed, 81 insertions(+), 53 deletions(-)
create mode 100644 clang/test/CodeGenHIP/dpp-const-fold.hip
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 5ab81cc605819c3..e7e498e8a933131 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5708,18 +5708,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::FunctionType *FTy = F->getFunctionType();
for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
- Value *ArgValue;
- // If this is a normal argument, just emit it as a scalar.
- if ((ICEArguments & (1 << i)) == 0) {
- ArgValue = EmitScalarExpr(E->getArg(i));
- } else {
- // If this is required to be a constant, constant fold it so that we
- // know that the generated intrinsic gets a ConstantInt.
- ArgValue = llvm::ConstantInt::get(
- getLLVMContext(),
- *E->getArg(i)->getIntegerConstantExpr(getContext()));
- }
-
+ Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
// If the intrinsic arg type is different from the builtin arg type
// we need to do a bit cast.
llvm::Type *PTy = FTy->getParamType(i);
@@ -8599,15 +8588,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
}
}
- if ((ICEArguments & (1 << i)) == 0) {
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- } else {
- // If this is required to be a constant, constant fold it so that we know
- // that the generated intrinsic gets a ConstantInt.
- Ops.push_back(llvm::ConstantInt::get(
- getLLVMContext(),
- *E->getArg(i)->getIntegerConstantExpr(getContext())));
- }
+ Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
}
switch (BuiltinID) {
@@ -11094,15 +11075,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
continue;
}
}
- if ((ICEArguments & (1 << i)) == 0) {
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- } else {
- // If this is required to be a constant, constant fold it so that we know
- // that the generated intrinsic gets a ConstantInt.
- Ops.push_back(llvm::ConstantInt::get(
- getLLVMContext(),
- *E->getArg(i)->getIntegerConstantExpr(getContext())));
- }
+ Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
}
auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
@@ -13814,16 +13787,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
assert(Error == ASTContext::GE_None && "Should not codegen an error");
for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
- // If this is a normal argument, just emit it as a scalar.
- if ((ICEArguments & (1 << i)) == 0) {
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- continue;
- }
-
- // If this is required to be a constant, constant fold it so that we know
- // that the generated intrinsic gets a ConstantInt.
- Ops.push_back(llvm::ConstantInt::get(
- getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext())));
+ Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
}
// These exist so that the builtin that takes an immediate can be bounds
@@ -17588,6 +17552,23 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
}
+llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
+ unsigned Idx,
+ const CallExpr *E) {
+ llvm::Value *Arg = nullptr;
+ if ((ICEArguments & (1 << Idx)) == 0) {
+ Arg = EmitScalarExpr(E->getArg(Idx));
+ } else {
+ // If this is required to be a constant, constant fold it so that we
+ // know that the generated intrinsic gets a ConstantInt.
+ std::optional<llvm::APSInt> Result =
+ E->getArg(Idx)->getIntegerConstantExpr(getContext());
+ assert(Result && "Expected argument to be a constant");
+ Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
+ }
+ return Arg;
+}
+
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
@@ -17638,8 +17619,15 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_mov_dpp:
case AMDGPU::BI__builtin_amdgcn_update_dpp: {
llvm::SmallVector<llvm::Value *, 6> Args;
- for (unsigned I = 0; I != E->getNumArgs(); ++I)
- Args.push_back(EmitScalarExpr(E->getArg(I)));
+ // Find out if any arguments are required to be integer constant
+ // expressions.
+ unsigned ICEArguments = 0;
+ ASTContext::GetBuiltinTypeError Error;
+ getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
+ assert(Error == ASTContext::GE_None && "Should not codegen an error");
+ for (unsigned I = 0; I != E->getNumArgs(); ++I) {
+ Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
+ }
assert(Args.size() == 5 || Args.size() == 6);
if (Args.size() == 5)
Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
@@ -20615,17 +20603,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
Ops.push_back(AggValue);
continue;
}
-
- // If this is a normal argument, just emit it as a scalar.
- if ((ICEArguments & (1 << i)) == 0) {
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- continue;
- }
-
- // If this is required to be a constant, constant fold it so that we know
- // that the generated intrinsic gets a ConstantInt.
- Ops.push_back(llvm::ConstantInt::get(
- getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext())));
+ Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
}
Intrinsic::ID ID = Intrinsic::not_intrinsic;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 42f94c9b540191e..dc6773eb83f5ece 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4327,6 +4327,8 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+ llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx,
+ const CallExpr *E);
llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
diff --git a/clang/test/CodeGenHIP/dpp-const-fold.hip b/clang/test/CodeGenHIP/dpp-const-fold.hip
new file mode 100644
index 000000000000000..f311ede5f71e6b5
--- /dev/null
+++ b/clang/test/CodeGenHIP/dpp-const-fold.hip
@@ -0,0 +1,48 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang --offload-arch=gfx906 -S -o - -emit-llvm --cuda-device-only -nogpuinc -nogpulib\
+// RUN: %s | FileCheck %s
+
+constexpr static int OpCtrl()
+{
+ return 15 + 1;
+}
+
+constexpr static int RowMask()
+{
+ return 3 + 1;
+}
+
+constexpr static int BankMask()
+{
+ return 2 + 1;
+}
+
+constexpr static bool BountCtrl()
+{
+ return true & false;
+}
+
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 16, i32 0, i32 0, i1 false)
+__attribute__((global)) void test_update_dpp_const_fold_imm_operand_2(int* out, int a, int b)
+{
+ *out = __builtin_amdgcn_update_dpp(a, b, OpCtrl(), 0, 0, false);
+}
+
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 4, i32 0, i1 false)
+__attribute__((global)) void test_update_dpp_const_fold_imm_operand_3(int* out, int a, int b)
+{
+ *out = __builtin_amdgcn_update_dpp(a, b, 0, RowMask(), 0, false);
+}
+
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 3, i1 false)
+__attribute__((global)) void test_update_dpp_const_fold_imm_operand_4(int* out, int a, int b)
+{
+ *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, BankMask(), false);
+}
+
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 0, i1 false)
+__attribute__((global)) void test_update_dpp_const_fold_imm_operand_5(int* out, int a, int b)
+{
+ *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, BountCtrl());
+}
>From b8604423aa5bf95c3b1948922acb427a0a834684 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap <Pravin.Jagtap at amd.com>
Date: Wed, 8 Nov 2023 23:23:32 -0500
Subject: [PATCH 2/2] Switched test to clang_cc1 from clang driver
---
clang/test/CodeGenHIP/dpp-const-fold.hip | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/clang/test/CodeGenHIP/dpp-const-fold.hip b/clang/test/CodeGenHIP/dpp-const-fold.hip
index f311ede5f71e6b5..f5a97c6b0e77ff3 100644
--- a/clang/test/CodeGenHIP/dpp-const-fold.hip
+++ b/clang/test/CodeGenHIP/dpp-const-fold.hip
@@ -1,7 +1,6 @@
// REQUIRES: amdgpu-registered-target
-
-// RUN: %clang --offload-arch=gfx906 -S -o - -emit-llvm --cuda-device-only -nogpuinc -nogpulib\
-// RUN: %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip -fcuda-is-device -emit-llvm %s \
+// RUN: -o - | FileCheck %s
constexpr static int OpCtrl()
{
More information about the cfe-commits
mailing list