[PATCH] D158903: AMDGPU: Fix asserting on fast f16 pown
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 25 15:41:29 PDT 2023
arsenm created this revision.
arsenm added reviewers: AMDGPU, rampitec, jmmartinez.
Herald added subscribers: foad, kerbowa, hiraditya, tpr, dstuttard, yaxunl, jvesely, kzhuravl.
Herald added a project: All.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.
https://reviews.llvm.org/D158903
Files:
llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
Index: llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -347,6 +347,27 @@
ret void
}
+declare half @_Z4pownDhi(half, i32)
+
+; GCN-LABEL: {{^}}define half @test_pown_f16(
+; GCN-NATIVE: %__fabs = tail call fast half @llvm.fabs.f16(half %x)
+; GCN-NATIVE: %__log2 = tail call fast half @_Z4log2Dh(half %__fabs)
+; GCN-NATIVE: %pownI2F = sitofp i32 %y to half
+; GCN-NATIVE: %__ylogx = fmul fast half %__log2, %pownI2F
+; GCN-NATIVE: %__exp2 = tail call fast half @_Z4exp2Dh(half %__ylogx)
+; GCN-NATIVE: %__ytou = trunc i32 %y to i16
+; GCN-NATIVE: %__yeven = shl i16 %__ytou, 15
+; GCN-NATIVE: %0 = bitcast half %x to i16
+; GCN-NATIVE: %__pow_sign = and i16 %__yeven, %0
+; GCN-NATIVE: %1 = bitcast half %__exp2 to i16
+; GCN-NATIVE: %2 = or i16 %__pow_sign, %1
+; GCN-NATIVE: %3 = bitcast i16 %2 to half
+define half @test_pown_f16(half %x, i32 %y) {
+entry:
+ %call = call fast half @_Z4pownDhi(half %x, i32 %y)
+ ret half %call
+}
+
declare float @_Z4pownfi(float, i32)
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow
@@ -791,6 +812,6 @@
; GCN-PRELINK: declare float @_Z4cbrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY:[0-9]+]]
; GCN-PRELINK: declare float @_Z11native_sqrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY]]
-; GCN-PRELINK: attributes #[[$NOUNWIND]] = { nounwind }
-; GCN-PRELINK: attributes #[[$NOUNWIND_READONLY]] = { nofree nounwind memory(read) }
+; GCN-PRELINK-DAG: attributes #[[$NOUNWIND]] = { nounwind }
+; GCN-PRELINK-DAG: attributes #[[$NOUNWIND_READONLY]] = { nofree nounwind memory(read) }
attributes #0 = { nounwind }
Index: llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1023,14 +1023,14 @@
if (needcopysign) {
Value *opr_n;
Type* rTy = opr0->getType();
- Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
+ Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits());
Type *nTy = nTyS;
if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
nTy = FixedVectorType::get(nTyS, vTy);
unsigned size = nTy->getScalarSizeInBits();
opr_n = FPOp->getOperand(1);
if (opr_n->getType()->isIntegerTy())
- opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
+ opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou");
else
opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D158903.553653.patch
Type: text/x-patch
Size: 2634 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230825/81e5c64e/attachment.bin>
More information about the llvm-commits
mailing list