[clang] [AMDGPU] const-fold imm operands of amdgcn_update_dpp intrinsic (PR #71139)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Nov 2 21:31:24 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Pravin Jagtap (pravinjagtap)
<details>
<summary>Changes</summary>
Operands of `__builtin_amdgcn_update_dpp` need to evaluate to constant to match the intrinsic requirements.
---
Full diff: https://github.com/llvm/llvm-project/pull/71139.diff
2 Files Affected:
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+14-2)
- (added) clang/test/CodeGenHIP/dpp-const-fold.hip (+48)
``````````diff
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e047d31c012116f..a4049cbc79d303d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17632,8 +17632,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_mov_dpp:
case AMDGPU::BI__builtin_amdgcn_update_dpp: {
llvm::SmallVector<llvm::Value *, 6> Args;
- for (unsigned I = 0; I != E->getNumArgs(); ++I)
- Args.push_back(EmitScalarExpr(E->getArg(I)));
+ for (unsigned I = 0; I != E->getNumArgs(); ++I) {
+ llvm::Value *Arg = EmitScalarExpr(E->getArg(I));
+ // Except first two input operands, all other are imm operands for dpp
+ // intrinsic.
+ if (llvm::is_contained(std::initializer_list<unsigned>{2, 3, 4, 5}, I)) {
+ // If this is required to be a constant, constant fold it so that we
+ // know that the generated intrinsic gets a ConstantInt.
+ std::optional<llvm::APSInt> Result =
+ E->getArg(I)->getIntegerConstantExpr(getContext());
+ assert(Result && "Expected argument to be a constant");
+ Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
+ }
+ Args.push_back(Arg);
+ }
assert(Args.size() == 5 || Args.size() == 6);
if (Args.size() == 5)
Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
diff --git a/clang/test/CodeGenHIP/dpp-const-fold.hip b/clang/test/CodeGenHIP/dpp-const-fold.hip
new file mode 100644
index 000000000000000..1d1d135fb06239a
--- /dev/null
+++ b/clang/test/CodeGenHIP/dpp-const-fold.hip
@@ -0,0 +1,48 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang --offload-arch=gfx906 -S -o - -emit-llvm --cuda-device-only \
+// RUN: %s | FileCheck %s
+
+constexpr static int OpCtrl()
+{
+ return 15 + 1;
+}
+
+constexpr static int RowMask()
+{
+ return 3 + 1;
+}
+
+constexpr static int BankMask()
+{
+ return 2 + 1;
+}
+
+constexpr static bool BountCtrl()
+{
+ return true & false;
+}
+
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 16, i32 0, i32 0, i1 false)
+__attribute__((global)) void test_update_dpp_const_fold_imm_operand_2(int* out, int a, int b)
+{
+ *out = __builtin_amdgcn_update_dpp(a, b, OpCtrl(), 0, 0, false);
+}
+
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 4, i32 0, i1 false)
+__attribute__((global)) void test_update_dpp_const_fold_imm_operand_3(int* out, int a, int b)
+{
+ *out = __builtin_amdgcn_update_dpp(a, b, 0, RowMask(), 0, false);
+}
+
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 3, i1 false)
+__attribute__((global)) void test_update_dpp_const_fold_imm_operand_4(int* out, int a, int b)
+{
+ *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, BankMask(), false);
+}
+
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 0, i1 false)
+__attribute__((global)) void test_update_dpp_const_fold_imm_operand_5(int* out, int a, int b)
+{
+ *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, BountCtrl());
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/71139
More information about the cfe-commits
mailing list