[PATCH] D158993: AMDGPU: Fix assertion on half typed pow with constant exponents

Mon Aug 28 07:57:30 PDT 2023

arsenm created this revision.
arsenm added reviewers: AMDGPU, rampitec, jmmartinez.
Herald added subscribers: foad, kerbowa, hiraditya, tpr, dstuttard, yaxunl, jvesely, kzhuravl.
Herald added a project: All.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.

https://reviews.llvm.org/D158993

Files:
  llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
  llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll


Index: llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -434,6 +434,39 @@
   ret void
 }
 
+declare half @_Z3powDhDh(half, half)
+declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>)
+
+; GCN-LABEL: define half @test_pow_fast_f16__y_13(half %x)
+; GCN-PRELINK: %__fabs = tail call fast half @llvm.fabs.f16(half %x)
+; GCN-PRELINK: %__log2 = tail call fast half @_Z4log2Dh(half %__fabs)
+; GCN-PRELINK: %__ylogx = fmul fast half %__log2, 0xH4A80
+; GCN-PRELINK: %__exp2 = tail call fast half @_Z4exp2Dh(half %__ylogx)
+; GCN-PRELINK: %1 = bitcast half %x to i16
+; GCN-PRELINK: %__pow_sign = and i16 %1, -32768
+; GCN-PRELINK: %2 = bitcast half %__exp2 to i16
+; GCN-PRELINK: %3 = or i16 %__pow_sign, %2
+; GCN-PRELINK: %4 = bitcast i16 %3 to half
+define half @test_pow_fast_f16__y_13(half %x) {
+  %powr = tail call fast half @_Z3powDhDh(half %x, half 13.0)
+  ret half %powr
+}
+
+; GCN-LABEL: define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x)
+; GCN-PRELINK: %__fabs = tail call fast <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
+; GCN-PRELINK: %__log2 = tail call fast <2 x half> @_Z4log2Dv2_Dh(<2 x half> %__fabs)
+; GCN-PRELINK: %__ylogx = fmul fast <2 x half> %__log2, <half 0xH4A80, half 0xH4A80>
+; GCN-PRELINK: %__exp2 = tail call fast <2 x half> @_Z4exp2Dv2_Dh(<2 x half> %__ylogx)
+; GCN-PRELINK: %1 = bitcast <2 x half> %x to <2 x i16>
+; GCN-PRELINK: %__pow_sign = and <2 x i16> %1, <i16 -32768, i16 -32768>
+; GCN-PRELINK: %2 = bitcast <2 x half> %__exp2 to <2 x i16>
+; GCN-PRELINK: %3 = or <2 x i16> %__pow_sign, %2
+; GCN-PRELINK: %4 = bitcast <2 x i16> %3 to <2 x half>
+define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) {
+  %powr = tail call fast <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 13.0, half 13.0>)
+  ret <2 x half> %powr
+}
+
 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_1
 ; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4
 ; GCN: store float %tmp, ptr addrspace(1) %a, align 4
Index: llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -950,9 +950,7 @@
 
       SmallVector<double, 0> DVal;
       for (int i=0; i < getVecSize(FInfo); ++i) {
-        double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
-                     ? (double)CDV->getElementAsFloat(i)
-                     : CDV->getElementAsDouble(i);
+        double V = CDV->getElementAsAPFloat(i).convertToDouble();
         if (V < 0.0) needcopysign = true;
         V = log2(std::abs(V));
         DVal.push_back(V);
@@ -986,9 +984,7 @@
     } else {
       if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
         for (int i=0; i < getVecSize(FInfo); ++i) {
-          double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
-                     ? (double)CDV->getElementAsFloat(i)
-                     : CDV->getElementAsDouble(i);
+          double y = CDV->getElementAsAPFloat(i).convertToDouble();
           if (y != (double)(int64_t)y)
             return false;
         }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D158993.553936.patch
Type: text/x-patch
Size: 3292 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230828/dcbf5401/attachment.bin>