[llvm] 4682039 - AMDGPU: Don't assert on printf of half
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 10 11:14:32 PST 2023
Author: Matt Arsenault
Date: 2023-01-10T14:13:23-05:00
New Revision: 4682039db0728dee5747246c371870750ba6c267
URL: https://github.com/llvm/llvm-project/commit/4682039db0728dee5747246c371870750ba6c267
DIFF: https://github.com/llvm/llvm-project/commit/4682039db0728dee5747246c371870750ba6c267.diff
LOG: AMDGPU: Don't assert on printf of half
The comment says fields should be 4-byte aligned, so just pass through
after conversion to integer. The conformance test lacks any testing of
half.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
llvm/test/CodeGen/AMDGPU/opencl-printf.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
index d5c76967a68f..1a28995f18cb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -234,6 +234,13 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
ResType = VectorType::get(ResType, VecType->getElementCount());
Builder.SetInsertPoint(CI);
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ if (ArgType->isFloatingPointTy()) {
+ Arg = Builder.CreateBitCast(
+ Arg,
+ IntegerType::getIntNTy(Ctx, ArgType->getPrimitiveSizeInBits()));
+ }
+
if (OpConvSpecifiers[ArgCount - 1] == 'x' ||
OpConvSpecifiers[ArgCount - 1] == 'X' ||
OpConvSpecifiers[ArgCount - 1] == 'u' ||
@@ -373,7 +380,6 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
"PrintBuffGep", Brnch);
Type *Int32Ty = Type::getInt32Ty(Ctx);
- Type *Int64Ty = Type::getInt64Ty(Ctx);
for (unsigned ArgCount = 1;
ArgCount < CI->arg_size() && ArgCount <= OpConvSpecifiers.size();
ArgCount++) {
@@ -381,7 +387,6 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
Type *ArgType = Arg->getType();
SmallVector<Value *, 32> WhatToStore;
if (ArgType->isFPOrFPVectorTy() && !isa<VectorType>(ArgType)) {
- Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty;
if (OpConvSpecifiers[ArgCount - 1] == 'f') {
if (auto *FpCons = dyn_cast<ConstantFP>(Arg)) {
APFloat Val(FpCons->getValueAPF());
@@ -389,16 +394,13 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
&Lost);
Arg = ConstantFP::get(Ctx, Val);
- IType = Int32Ty;
} else if (auto *FpExt = dyn_cast<FPExtInst>(Arg)) {
if (FpExt->getType()->isDoubleTy() &&
FpExt->getOperand(0)->getType()->isFloatTy()) {
Arg = FpExt->getOperand(0);
- IType = Int32Ty;
}
}
}
- Arg = new BitCastInst(Arg, IType, "PrintArgFP", Brnch);
WhatToStore.push_back(Arg);
} else if (isa<PointerType>(ArgType)) {
if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) {
diff --git a/llvm/test/CodeGen/AMDGPU/opencl-printf.ll b/llvm/test/CodeGen/AMDGPU/opencl-printf.ll
index a7e5289e527c..b3c0870c4f7b 100644
--- a/llvm/test/CodeGen/AMDGPU/opencl-printf.ll
+++ b/llvm/test/CodeGen/AMDGPU/opencl-printf.ll
@@ -32,7 +32,7 @@
@printed.str.float.0 = private addrspace(4) constant float 0.0, align 4
@printed.str.ptr.null = private addrspace(4) constant ptr null, align 4
@printed.str.ptr.undef = private addrspace(4) constant ptr undef, align 4
- at format.str.f = private unnamed_addr addrspace(4) constant [30 x i8] c"%f %f %f %f %f %f %f %f %f %f\00", align 1
+ at format.str.f = private unnamed_addr addrspace(4) constant [33 x i8] c"%f %f %f %f %f %f %f %f %f %f %f\00", align 1
@format.str.p = private unnamed_addr addrspace(4) constant [15 x i8] c"%p %p %p %p %p\00", align 1
@format.str.d = private unnamed_addr addrspace(4) constant [30 x i8] c"%d %d %d %d %d %d %d %d %d %d\00", align 1
@@ -45,66 +45,65 @@
@format.str.u = private unnamed_addr addrspace(4) constant [30 x i8] c"%u %u %u %u %u %u %u %u %u %u\00", align 1
-; FIXME: Crashes on half
-define amdgpu_kernel void @format_str_f(float %f32.0, double %f64, float %f32.1, i16 %i16, i32 %i32, i64 %i64) {
+define amdgpu_kernel void @format_str_f(float %f32.0, double %f64, float %f32.1, i16 %i16, i32 %i32, i64 %i64, half %f16) {
; R600-LABEL: @format_str_f(
; R600-NEXT: [[FPEXT_F32_TO_F64:%.*]] = fpext float [[F32_1:%.*]] to double
-; R600-NEXT: [[CALL1:%.*]] = call i32 @printf(ptr addrspace(4) @format.str.f, float [[F32_0:%.*]], double [[F64:%.*]], double [[FPEXT_F32_TO_F64]], float 1.000000e+00, double 2.000000e+00, i16 [[I16:%.*]], i32 [[I32:%.*]], i64 [[I64:%.*]], <2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x i32> <i32 8, i32 234>)
+; R600-NEXT: [[CALL1:%.*]] = call i32 @printf(ptr addrspace(4) @format.str.f, float [[F32_0:%.*]], double [[F64:%.*]], double [[FPEXT_F32_TO_F64]], float 1.000000e+00, double 2.000000e+00, i16 [[I16:%.*]], i32 [[I32:%.*]], i64 [[I64:%.*]], <2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x i32> <i32 8, i32 234>, half [[F16:%.*]])
; R600-NEXT: ret void
;
; GCN-LABEL: @format_str_f(
; GCN-NEXT: [[FPEXT_F32_TO_F64:%.*]] = fpext float [[F32_1:%.*]] to double
; GCN-NEXT: [[TMP1:%.*]] = sext i16 [[I16:%.*]] to i32
-; GCN-NEXT: [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 60)
+; GCN-NEXT: [[TMP2:%.*]] = bitcast half [[F16:%.*]] to i16
+; GCN-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32
+; GCN-NEXT: [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 64)
; GCN-NEXT: br label [[DOTSPLIT:%.*]]
; GCN: .split:
-; GCN-NEXT: [[TMP2:%.*]] = icmp ne ptr addrspace(1) [[PRINTF_ALLOC_FN]], null
-; GCN-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
-; GCN: 3:
+; GCN-NEXT: [[TMP4:%.*]] = icmp ne ptr addrspace(1) [[PRINTF_ALLOC_FN]], null
+; GCN-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]]
+; GCN: 5:
; GCN-NEXT: [[PRINTBUFFID:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTF_ALLOC_FN]], i32 0
; GCN-NEXT: [[PRINTBUFFIDCAST:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFID]] to ptr addrspace(1)
; GCN-NEXT: store i32 1, ptr addrspace(1) [[PRINTBUFFIDCAST]], align 4
; GCN-NEXT: [[PRINTBUFFGEP:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTF_ALLOC_FN]], i32 4
-; GCN-NEXT: [[PRINTARGFP:%.*]] = bitcast float [[F32_0:%.*]] to i32
; GCN-NEXT: [[PRINTBUFFPTRCAST:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFGEP]] to ptr addrspace(1)
-; GCN-NEXT: store i32 [[PRINTARGFP]], ptr addrspace(1) [[PRINTBUFFPTRCAST]], align 4
+; GCN-NEXT: store float [[F32_0:%.*]], ptr addrspace(1) [[PRINTBUFFPTRCAST]], align 4
; GCN-NEXT: [[PRINTBUFFNEXTPTR:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFGEP]], i32 4
-; GCN-NEXT: [[PRINTARGFP1:%.*]] = bitcast double [[F64:%.*]] to i64
-; GCN-NEXT: [[PRINTBUFFPTRCAST2:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFNEXTPTR]] to ptr addrspace(1)
-; GCN-NEXT: store i64 [[PRINTARGFP1]], ptr addrspace(1) [[PRINTBUFFPTRCAST2]], align 4
-; GCN-NEXT: [[PRINTBUFFNEXTPTR3:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR]], i32 8
-; GCN-NEXT: [[PRINTARGFP4:%.*]] = bitcast float [[F32_1]] to i32
-; GCN-NEXT: [[PRINTBUFFPTRCAST5:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFNEXTPTR3]] to ptr addrspace(1)
-; GCN-NEXT: store i32 [[PRINTARGFP4]], ptr addrspace(1) [[PRINTBUFFPTRCAST5]], align 4
-; GCN-NEXT: [[PRINTBUFFNEXTPTR6:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR3]], i32 4
-; GCN-NEXT: [[PRINTARGFP7:%.*]] = bitcast float 1.000000e+00 to i32
-; GCN-NEXT: [[PRINTBUFFPTRCAST8:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFNEXTPTR6]] to ptr addrspace(1)
-; GCN-NEXT: store i32 [[PRINTARGFP7]], ptr addrspace(1) [[PRINTBUFFPTRCAST8]], align 4
-; GCN-NEXT: [[PRINTBUFFNEXTPTR9:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], i32 4
-; GCN-NEXT: [[PRINTARGFP10:%.*]] = bitcast float 2.000000e+00 to i32
-; GCN-NEXT: [[PRINTBUFFPTRCAST11:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFNEXTPTR9]] to ptr addrspace(1)
-; GCN-NEXT: store i32 [[PRINTARGFP10]], ptr addrspace(1) [[PRINTBUFFPTRCAST11]], align 4
-; GCN-NEXT: [[PRINTBUFFNEXTPTR12:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR9]], i32 4
+; GCN-NEXT: [[PRINTBUFFPTRCAST1:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFNEXTPTR]] to ptr addrspace(1)
+; GCN-NEXT: store double [[F64:%.*]], ptr addrspace(1) [[PRINTBUFFPTRCAST1]], align 8
+; GCN-NEXT: [[PRINTBUFFNEXTPTR2:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR]], i32 8
+; GCN-NEXT: [[PRINTBUFFPTRCAST3:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFNEXTPTR2]] to ptr addrspace(1)
+; GCN-NEXT: store float [[F32_1]], ptr addrspace(1) [[PRINTBUFFPTRCAST3]], align 4
+; GCN-NEXT: [[PRINTBUFFNEXTPTR4:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR2]], i32 4
+; GCN-NEXT: [[PRINTBUFFPTRCAST5:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFNEXTPTR4]] to ptr addrspace(1)
+; GCN-NEXT: store float 1.000000e+00, ptr addrspace(1) [[PRINTBUFFPTRCAST5]], align 4
+; GCN-NEXT: [[PRINTBUFFNEXTPTR6:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR4]], i32 4
+; GCN-NEXT: [[PRINTBUFFPTRCAST7:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFNEXTPTR6]] to ptr addrspace(1)
+; GCN-NEXT: store float 2.000000e+00, ptr addrspace(1) [[PRINTBUFFPTRCAST7]], align 4
+; GCN-NEXT: [[PRINTBUFFNEXTPTR8:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], i32 4
+; GCN-NEXT: [[PRINTBUFFPTRCAST9:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFNEXTPTR8]] to ptr addrspace(1)
+; GCN-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[PRINTBUFFPTRCAST9]], align 4
+; GCN-NEXT: [[PRINTBUFFNEXTPTR10:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR8]], i32 4
+; GCN-NEXT: [[PRINTBUFFPTRCAST11:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFNEXTPTR10]] to ptr addrspace(1)
+; GCN-NEXT: store i32 [[I32:%.*]], ptr addrspace(1) [[PRINTBUFFPTRCAST11]], align 4
+; GCN-NEXT: [[PRINTBUFFNEXTPTR12:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR10]], i32 4
; GCN-NEXT: [[PRINTBUFFPTRCAST13:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFNEXTPTR12]] to ptr addrspace(1)
-; GCN-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[PRINTBUFFPTRCAST13]], align 4
-; GCN-NEXT: [[PRINTBUFFNEXTPTR14:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR12]], i32 4
+; GCN-NEXT: store i64 [[I64:%.*]], ptr addrspace(1) [[PRINTBUFFPTRCAST13]], align 4
+; GCN-NEXT: [[PRINTBUFFNEXTPTR14:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR12]], i32 8
; GCN-NEXT: [[PRINTBUFFPTRCAST15:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFNEXTPTR14]] to ptr addrspace(1)
-; GCN-NEXT: store i32 [[I32:%.*]], ptr addrspace(1) [[PRINTBUFFPTRCAST15]], align 4
-; GCN-NEXT: [[PRINTBUFFNEXTPTR16:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR14]], i32 4
+; GCN-NEXT: store <2 x float> <float 1.000000e+00, float 2.000000e+00>, ptr addrspace(1) [[PRINTBUFFPTRCAST15]], align 8
+; GCN-NEXT: [[PRINTBUFFNEXTPTR16:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR14]], i32 8
; GCN-NEXT: [[PRINTBUFFPTRCAST17:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFNEXTPTR16]] to ptr addrspace(1)
-; GCN-NEXT: store i64 [[I64:%.*]], ptr addrspace(1) [[PRINTBUFFPTRCAST17]], align 4
+; GCN-NEXT: store <2 x i32> <i32 8, i32 234>, ptr addrspace(1) [[PRINTBUFFPTRCAST17]], align 8
; GCN-NEXT: [[PRINTBUFFNEXTPTR18:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR16]], i32 8
; GCN-NEXT: [[PRINTBUFFPTRCAST19:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFNEXTPTR18]] to ptr addrspace(1)
-; GCN-NEXT: store <2 x float> <float 1.000000e+00, float 2.000000e+00>, ptr addrspace(1) [[PRINTBUFFPTRCAST19]], align 8
-; GCN-NEXT: [[PRINTBUFFNEXTPTR20:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR18]], i32 8
-; GCN-NEXT: [[PRINTBUFFPTRCAST21:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFNEXTPTR20]] to ptr addrspace(1)
-; GCN-NEXT: store <2 x i32> <i32 8, i32 234>, ptr addrspace(1) [[PRINTBUFFPTRCAST21]], align 8
-; GCN-NEXT: br label [[TMP4]]
-; GCN: 4:
+; GCN-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[PRINTBUFFPTRCAST19]], align 4
+; GCN-NEXT: br label [[TMP6]]
+; GCN: 6:
; GCN-NEXT: ret void
;
%fpext.f32.to.f64 = fpext float %f32.1 to double
- %call1 = call i32 @printf(ptr addrspace(4) @format.str.f, float %f32.0, double %f64, double %fpext.f32.to.f64, float 1.0, double 2.0, i16 %i16, i32 %i32, i64 %i64, <2 x float> <float 1.0, float 2.0>, <2 x i32> <i32 8, i32 234>)
+ %call1 = call i32 @printf(ptr addrspace(4) @format.str.f, float %f32.0, double %f64, double %fpext.f32.to.f64, float 1.0, double 2.0, i16 %i16, i32 %i32, i64 %i64, <2 x float> <float 1.0, float 2.0>, <2 x i32> <i32 8, i32 234>, half %f16)
ret void
}
More information about the llvm-commits
mailing list