[llvm] 80e5b46 - AMDGPU: Fix assertion on half typed pow with constant exponents

Mon Aug 28 10:54:57 PDT 2023

Author: Matt Arsenault
Date: 2023-08-28T13:54:49-04:00
New Revision: 80e5b46e4523491328114b95a2813b4d4d7d396b

URL: https://github.com/llvm/llvm-project/commit/80e5b46e4523491328114b95a2813b4d4d7d396b
DIFF: https://github.com/llvm/llvm-project/commit/80e5b46e4523491328114b95a2813b4d4d7d396b.diff

LOG: AMDGPU: Fix assertion on half typed pow with constant exponents

https://reviews.llvm.org/D158993

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
    llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index c0295d76675ad5..957a6dd386d365 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -950,9 +950,7 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
 
       SmallVector<double, 0> DVal;
       for (int i=0; i < getVecSize(FInfo); ++i) {
-        double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
-                     ? (double)CDV->getElementAsFloat(i)
-                     : CDV->getElementAsDouble(i);
+        double V = CDV->getElementAsAPFloat(i).convertToDouble();
         if (V < 0.0) needcopysign = true;
         V = log2(std::abs(V));
         DVal.push_back(V);
@@ -986,9 +984,7 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
     } else {
       if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
         for (int i=0; i < getVecSize(FInfo); ++i) {
-          double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
-                     ? (double)CDV->getElementAsFloat(i)
-                     : CDV->getElementAsDouble(i);
+          double y = CDV->getElementAsAPFloat(i).convertToDouble();
           if (y != (double)(int64_t)y)
             return false;
         }

diff  --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
index fd7ac5294adc1b..2f4a7c5c9ea06a 100644
--- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -434,6 +434,39 @@ entry:
   ret void
 }
 
+declare half @_Z3powDhDh(half, half)
+declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>)
+
+; GCN-LABEL: define half @test_pow_fast_f16__y_13(half %x)
+; GCN-PRELINK: %__fabs = tail call fast half @llvm.fabs.f16(half %x)
+; GCN-PRELINK: %__log2 = tail call fast half @_Z4log2Dh(half %__fabs)
+; GCN-PRELINK: %__ylogx = fmul fast half %__log2, 0xH4A80
+; GCN-PRELINK: %__exp2 = tail call fast half @_Z4exp2Dh(half %__ylogx)
+; GCN-PRELINK: %1 = bitcast half %x to i16
+; GCN-PRELINK: %__pow_sign = and i16 %1, -32768
+; GCN-PRELINK: %2 = bitcast half %__exp2 to i16
+; GCN-PRELINK: %3 = or i16 %__pow_sign, %2
+; GCN-PRELINK: %4 = bitcast i16 %3 to half
+define half @test_pow_fast_f16__y_13(half %x) {
+  %powr = tail call fast half @_Z3powDhDh(half %x, half 13.0)
+  ret half %powr
+}
+
+; GCN-LABEL: define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x)
+; GCN-PRELINK: %__fabs = tail call fast <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
+; GCN-PRELINK: %__log2 = tail call fast <2 x half> @_Z4log2Dv2_Dh(<2 x half> %__fabs)
+; GCN-PRELINK: %__ylogx = fmul fast <2 x half> %__log2, <half 0xH4A80, half 0xH4A80>
+; GCN-PRELINK: %__exp2 = tail call fast <2 x half> @_Z4exp2Dv2_Dh(<2 x half> %__ylogx)
+; GCN-PRELINK: %1 = bitcast <2 x half> %x to <2 x i16>
+; GCN-PRELINK: %__pow_sign = and <2 x i16> %1, <i16 -32768, i16 -32768>
+; GCN-PRELINK: %2 = bitcast <2 x half> %__exp2 to <2 x i16>
+; GCN-PRELINK: %3 = or <2 x i16> %__pow_sign, %2
+; GCN-PRELINK: %4 = bitcast <2 x i16> %3 to <2 x half>
+define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) {
+  %powr = tail call fast <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 13.0, half 13.0>)
+  ret <2 x half> %powr
+}
+
 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_1
 ; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4
 ; GCN: store float %tmp, ptr addrspace(1) %a, align 4