[clang] d1fb930 - Revert "[AMDGPU] const-fold imm operands of amdgcn_update_dpp intrinsic (#71139)"

Wed Nov 8 03:51:25 PST 2023

Author: Mitch Phillips
Date: 2023-11-08T12:50:53+01:00
New Revision: d1fb9307951319eea3e869d78470341d603c8363

URL: https://github.com/llvm/llvm-project/commit/d1fb9307951319eea3e869d78470341d603c8363
DIFF: https://github.com/llvm/llvm-project/commit/d1fb9307951319eea3e869d78470341d603c8363.diff

LOG: Revert "[AMDGPU] const-fold imm operands of amdgcn_update_dpp intrinsic (#71139)"

This reverts commit 32a3f2afe6ea7ffb02a6a188b123ded6f4c89f6c.

Reason: Broke the sanitizer buildbots. More details at
https://github.com/llvm/llvm-project/commit/32a3f2afe6ea7ffb02a6a188b123ded6f4c89f6c

Added: 
    

Modified: 
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/CodeGen/CodeGenFunction.h

Removed: 
    clang/test/CodeGenHIP/dpp-const-fold.hip


################################################################################
diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e7e498e8a933131..5ab81cc605819c3 100644

--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5708,7 +5708,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     llvm::FunctionType *FTy = F->getFunctionType();
 
     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
-      Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
+      Value *ArgValue;
+      // If this is a normal argument, just emit it as a scalar.
+      if ((ICEArguments & (1 << i)) == 0) {
+        ArgValue = EmitScalarExpr(E->getArg(i));
+      } else {
+        // If this is required to be a constant, constant fold it so that we
+        // know that the generated intrinsic gets a ConstantInt.
+        ArgValue = llvm::ConstantInt::get(
+            getLLVMContext(),
+            *E->getArg(i)->getIntegerConstantExpr(getContext()));
+      }
+
       // If the intrinsic arg type is 
diff erent from the builtin arg type
       // we need to do a bit cast.
       llvm::Type *PTy = FTy->getParamType(i);
@@ -8588,7 +8599,15 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
       }
     }
 
-    Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
+    if ((ICEArguments & (1 << i)) == 0) {
+      Ops.push_back(EmitScalarExpr(E->getArg(i)));
+    } else {
+      // If this is required to be a constant, constant fold it so that we know
+      // that the generated intrinsic gets a ConstantInt.
+      Ops.push_back(llvm::ConstantInt::get(
+          getLLVMContext(),
+          *E->getArg(i)->getIntegerConstantExpr(getContext())));
+    }
   }
 
   switch (BuiltinID) {
@@ -11075,7 +11094,15 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
         continue;
       }
     }
-    Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
+    if ((ICEArguments & (1 << i)) == 0) {
+      Ops.push_back(EmitScalarExpr(E->getArg(i)));
+    } else {
+      // If this is required to be a constant, constant fold it so that we know
+      // that the generated intrinsic gets a ConstantInt.
+      Ops.push_back(llvm::ConstantInt::get(
+          getLLVMContext(),
+          *E->getArg(i)->getIntegerConstantExpr(getContext())));
+    }
   }
 
   auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
@@ -13787,7 +13814,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   assert(Error == ASTContext::GE_None && "Should not codegen an error");
 
   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
-    Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
+    // If this is a normal argument, just emit it as a scalar.
+    if ((ICEArguments & (1 << i)) == 0) {
+      Ops.push_back(EmitScalarExpr(E->getArg(i)));
+      continue;
+    }
+
+    // If this is required to be a constant, constant fold it so that we know
+    // that the generated intrinsic gets a ConstantInt.
+    Ops.push_back(llvm::ConstantInt::get(
+        getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext())));
   }
 
   // These exist so that the builtin that takes an immediate can be bounds
@@ -17552,23 +17588,6 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
   SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
 }
 
-llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
-                                                          unsigned Idx,
-                                                          const CallExpr *E) {
-  llvm::Value *Arg = nullptr;
-  if ((ICEArguments & (1 << Idx)) == 0) {
-    Arg = EmitScalarExpr(E->getArg(Idx));
-  } else {
-    // If this is required to be a constant, constant fold it so that we
-    // know that the generated intrinsic gets a ConstantInt.
-    std::optional<llvm::APSInt> Result =
-        E->getArg(Idx)->getIntegerConstantExpr(getContext());
-    assert(Result && "Expected argument to be a constant");
-    Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
-  }
-  return Arg;
-}
-
 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
                                               const CallExpr *E) {
   llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
@@ -17619,15 +17638,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_mov_dpp:
   case AMDGPU::BI__builtin_amdgcn_update_dpp: {
     llvm::SmallVector<llvm::Value *, 6> Args;
-    // Find out if any arguments are required to be integer constant
-    // expressions.
-    unsigned ICEArguments = 0;
-    ASTContext::GetBuiltinTypeError Error;
-    getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
-    assert(Error == ASTContext::GE_None && "Should not codegen an error");
-    for (unsigned I = 0; I != E->getNumArgs(); ++I) {
-      Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
-    }
+    for (unsigned I = 0; I != E->getNumArgs(); ++I)
+      Args.push_back(EmitScalarExpr(E->getArg(I)));
     assert(Args.size() == 5 || Args.size() == 6);
     if (Args.size() == 5)
       Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
@@ -20603,7 +20615,17 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
       Ops.push_back(AggValue);
       continue;
     }
-    Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
+
+    // If this is a normal argument, just emit it as a scalar.
+    if ((ICEArguments & (1 << i)) == 0) {
+      Ops.push_back(EmitScalarExpr(E->getArg(i)));
+      continue;
+    }
+
+    // If this is required to be a constant, constant fold it so that we know
+    // that the generated intrinsic gets a ConstantInt.
+    Ops.push_back(llvm::ConstantInt::get(
+        getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext())));
   }
 
   Intrinsic::ID ID = Intrinsic::not_intrinsic;

diff  --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index dc6773eb83f5ece..42f94c9b540191e 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4327,8 +4327,6 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
-  llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx,
-                                           const CallExpr *E);
   llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,

diff  --git a/clang/test/CodeGenHIP/dpp-const-fold.hip b/clang/test/CodeGenHIP/dpp-const-fold.hip
deleted file mode 100644
index 1d1d135fb06239a..000000000000000
--- a/clang/test/CodeGenHIP/dpp-const-fold.hip
+++ /dev/null
@@ -1,48 +0,0 @@
-// REQUIRES: amdgpu-registered-target
-
-// RUN: %clang --offload-arch=gfx906 -S -o - -emit-llvm --cuda-device-only \
-// RUN:   %s | FileCheck %s
-
-constexpr static int OpCtrl()
-{
-    return 15 + 1;
-}
-
-constexpr static int RowMask()
-{
-    return 3 + 1;
-}
-
-constexpr static int BankMask()
-{
-    return 2 + 1;
-}
-
-constexpr static bool BountCtrl()
-{
-    return true & false;
-}
-
-// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 16, i32 0, i32 0, i1 false)
-__attribute__((global)) void test_update_dpp_const_fold_imm_operand_2(int* out, int a, int b)
-{
-  *out = __builtin_amdgcn_update_dpp(a, b, OpCtrl(), 0, 0, false);
-}
-
-// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 4, i32 0, i1 false)
-__attribute__((global)) void test_update_dpp_const_fold_imm_operand_3(int* out, int a, int b)
-{
-  *out = __builtin_amdgcn_update_dpp(a, b, 0, RowMask(), 0, false);
-}
-
-// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 3, i1 false)
-__attribute__((global)) void test_update_dpp_const_fold_imm_operand_4(int* out, int a, int b)
-{
-  *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, BankMask(), false);
-}
-
-// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 0, i1 false)
-__attribute__((global)) void test_update_dpp_const_fold_imm_operand_5(int* out, int a, int b)
-{
-  *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, BountCtrl());
-}