[clang] [llvm] [AMDGPU] Extend __builtin_amdgcn_ds_bpermute argument types (PR #153501)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 10 21:00:29 PDT 2025
================
@@ -159,6 +159,242 @@ Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
llvm::MDNode::get(CGF.getLLVMContext(), {}));
return LD;
}
+
+// Emits LLVM IR to lower a generic AMDGCN ds_bpermute over arbitrary payload
+// types. Assumes DataLayout is accurate; index is coerced to i32; payload is
+// split/coerced to 32-bit words.
+llvm::Value *emitAMDGCNDsBpermute(clang::CodeGen::CodeGenFunction &CGF,
+ const clang::CallExpr *Call,
+ ReturnValueSlot Dest) {
+ auto &B = CGF.Builder;
+ auto &CGM = CGF.CGM;
+ const llvm::DataLayout &DL = CGM.getDataLayout();
+
+ llvm::Type *I8 = B.getInt8Ty();
+ llvm::Type *I32 = B.getInt32Ty();
+
+ auto C32 = [&](uint32_t V) { return llvm::ConstantInt::get(I32, V); };
+
+ // Size/bitwidth and coercion helpers for arbitrary first-class types.
+ auto GetBitWidth = [&](llvm::Type *Ty) -> unsigned {
+ return DL.getTypeSizeInBits(Ty).getFixedValue();
+ };
+
+ auto ToI32Index = [&](llvm::Value *IdxVal,
+ clang::QualType IdxQT) -> llvm::Value * {
+ (void)IdxQT;
+ llvm::Type *Ty = IdxVal->getType();
+ if (Ty->isIntegerTy())
+ return B.CreateZExtOrTrunc(IdxVal, I32);
+ if (Ty->isPointerTy()) {
+ unsigned PtrBits = DL.getPointerSizeInBits(Ty->getPointerAddressSpace());
+ return B.CreateZExtOrTrunc(B.CreatePtrToInt(IdxVal, B.getIntNTy(PtrBits)),
+ I32);
+ }
+ unsigned Bits = GetBitWidth(Ty);
+ return B.CreateZExtOrTrunc(B.CreateBitCast(IdxVal, B.getIntNTy(Bits)), I32);
+ };
+
+ auto CoercePayloadToI32 = [&](llvm::Value *Val,
----------------
arsenm wrote:
This is continuing to reinvent cast sequences we have in many places
https://github.com/llvm/llvm-project/pull/153501
More information about the llvm-commits
mailing list