[llvm] 35c2a75 - AMDGPU: Fix asserting on fast f16 pown
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 25 16:58:52 PDT 2023
Author: Matt Arsenault
Date: 2023-08-25T19:56:20-04:00
New Revision: 35c2a7542c5cba817e48ec2e1e995c285788395c
URL: https://github.com/llvm/llvm-project/commit/35c2a7542c5cba817e48ec2e1e995c285788395c
DIFF: https://github.com/llvm/llvm-project/commit/35c2a7542c5cba817e48ec2e1e995c285788395c.diff
LOG: AMDGPU: Fix asserting on fast f16 pown
https://reviews.llvm.org/D158903
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 3d72b834399558..c0295d76675ad5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1021,14 +1021,14 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
if (needcopysign) {
Value *opr_n;
Type* rTy = opr0->getType();
- Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
+ Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits());
Type *nTy = nTyS;
if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
nTy = FixedVectorType::get(nTyS, vTy);
unsigned size = nTy->getScalarSizeInBits();
opr_n = FPOp->getOperand(1);
if (opr_n->getType()->isIntegerTy())
- opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
+ opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou");
else
opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
index 4468f8ec90faa3..fd7ac5294adc1b 100644
--- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -347,6 +347,27 @@ entry:
ret void
}
+declare half @_Z4pownDhi(half, i32)
+
+; GCN-LABEL: {{^}}define half @test_pown_f16(
+; GCN-NATIVE: %__fabs = tail call fast half @llvm.fabs.f16(half %x)
+; GCN-NATIVE: %__log2 = tail call fast half @_Z4log2Dh(half %__fabs)
+; GCN-NATIVE: %pownI2F = sitofp i32 %y to half
+; GCN-NATIVE: %__ylogx = fmul fast half %__log2, %pownI2F
+; GCN-NATIVE: %__exp2 = tail call fast half @_Z4exp2Dh(half %__ylogx)
+; GCN-NATIVE: %__ytou = trunc i32 %y to i16
+; GCN-NATIVE: %__yeven = shl i16 %__ytou, 15
+; GCN-NATIVE: %0 = bitcast half %x to i16
+; GCN-NATIVE: %__pow_sign = and i16 %__yeven, %0
+; GCN-NATIVE: %1 = bitcast half %__exp2 to i16
+; GCN-NATIVE: %2 = or i16 %__pow_sign, %1
+; GCN-NATIVE: %3 = bitcast i16 %2 to half
+define half @test_pown_f16(half %x, i32 %y) {
+entry:
+ %call = call fast half @_Z4pownDhi(half %x, i32 %y)
+ ret half %call
+}
+
declare float @_Z4pownfi(float, i32)
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow
@@ -791,6 +812,6 @@ entry:
; GCN-PRELINK: declare float @_Z4cbrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY:[0-9]+]]
; GCN-PRELINK: declare float @_Z11native_sqrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY]]
-; GCN-PRELINK: attributes #[[$NOUNWIND]] = { nounwind }
-; GCN-PRELINK: attributes #[[$NOUNWIND_READONLY]] = { nofree nounwind memory(read) }
+; GCN-PRELINK-DAG: attributes #[[$NOUNWIND]] = { nounwind }
+; GCN-PRELINK-DAG: attributes #[[$NOUNWIND_READONLY]] = { nofree nounwind memory(read) }
attributes #0 = { nounwind }
More information about the llvm-commits
mailing list