[PATCH] D158993: AMDGPU: Fix assertion on half typed pow with constant exponents
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 28 07:57:30 PDT 2023
arsenm created this revision.
arsenm added reviewers: AMDGPU, rampitec, jmmartinez.
Herald added subscribers: foad, kerbowa, hiraditya, tpr, dstuttard, yaxunl, jvesely, kzhuravl.
Herald added a project: All.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.
https://reviews.llvm.org/D158993
Files:
llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
Index: llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -434,6 +434,39 @@
ret void
}
+declare half @_Z3powDhDh(half, half)
+declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>)
+
+; GCN-LABEL: define half @test_pow_fast_f16__y_13(half %x)
+; GCN-PRELINK: %__fabs = tail call fast half @llvm.fabs.f16(half %x)
+; GCN-PRELINK: %__log2 = tail call fast half @_Z4log2Dh(half %__fabs)
+; GCN-PRELINK: %__ylogx = fmul fast half %__log2, 0xH4A80
+; GCN-PRELINK: %__exp2 = tail call fast half @_Z4exp2Dh(half %__ylogx)
+; GCN-PRELINK: %1 = bitcast half %x to i16
+; GCN-PRELINK: %__pow_sign = and i16 %1, -32768
+; GCN-PRELINK: %2 = bitcast half %__exp2 to i16
+; GCN-PRELINK: %3 = or i16 %__pow_sign, %2
+; GCN-PRELINK: %4 = bitcast i16 %3 to half
+define half @test_pow_fast_f16__y_13(half %x) {
+ %powr = tail call fast half @_Z3powDhDh(half %x, half 13.0)
+ ret half %powr
+}
+
+; GCN-LABEL: define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x)
+; GCN-PRELINK: %__fabs = tail call fast <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
+; GCN-PRELINK: %__log2 = tail call fast <2 x half> @_Z4log2Dv2_Dh(<2 x half> %__fabs)
+; GCN-PRELINK: %__ylogx = fmul fast <2 x half> %__log2, <half 0xH4A80, half 0xH4A80>
+; GCN-PRELINK: %__exp2 = tail call fast <2 x half> @_Z4exp2Dv2_Dh(<2 x half> %__ylogx)
+; GCN-PRELINK: %1 = bitcast <2 x half> %x to <2 x i16>
+; GCN-PRELINK: %__pow_sign = and <2 x i16> %1, <i16 -32768, i16 -32768>
+; GCN-PRELINK: %2 = bitcast <2 x half> %__exp2 to <2 x i16>
+; GCN-PRELINK: %3 = or <2 x i16> %__pow_sign, %2
+; GCN-PRELINK: %4 = bitcast <2 x i16> %3 to <2 x half>
+define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) {
+ %powr = tail call fast <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 13.0, half 13.0>)
+ ret <2 x half> %powr
+}
+
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_1
; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4
; GCN: store float %tmp, ptr addrspace(1) %a, align 4
Index: llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -950,9 +950,7 @@
SmallVector<double, 0> DVal;
for (int i=0; i < getVecSize(FInfo); ++i) {
- double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
- ? (double)CDV->getElementAsFloat(i)
- : CDV->getElementAsDouble(i);
+ double V = CDV->getElementAsAPFloat(i).convertToDouble();
if (V < 0.0) needcopysign = true;
V = log2(std::abs(V));
DVal.push_back(V);
@@ -986,9 +984,7 @@
} else {
if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
for (int i=0; i < getVecSize(FInfo); ++i) {
- double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
- ? (double)CDV->getElementAsFloat(i)
- : CDV->getElementAsDouble(i);
+ double y = CDV->getElementAsAPFloat(i).convertToDouble();
if (y != (double)(int64_t)y)
return false;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D158993.553936.patch
Type: text/x-patch
Size: 3292 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230828/dcbf5401/attachment.bin>
More information about the llvm-commits
mailing list