[PATCH] D120150: Constant folding of llvm.amdgcn.trig.preop

Wed May 11 11:40:22 PDT 2022

Ravi updated this revision to Diff 428730.
Ravi added a comment.
Herald added subscribers: kosarev, jsilvanus, hsmhsm.
Herald added a project: All.

Fixed all the review comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120150/new/

https://reviews.llvm.org/D120150

Files:
  llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll


Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll
@@ -1,5 +1,7 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN:  opt -S -mtriple=amdgcn-- -mcpu=gfx900 -instcombine < %s | FileCheck -check-prefix=GCN %s
+; RUN:  opt -S -mtriple=amdgcn-- -mcpu=gfx1010 -instcombine < %s | FileCheck -check-prefix=GCN %s
 
 declare double @llvm.amdgcn.trig.preop.f64(double, i32) nounwind readnone
 
@@ -28,3 +30,11 @@
   store double %result, double addrspace(1)* %out, align 8
   ret void
 }
+
+define protected amdgpu_kernel void @trig_preop_constfold(double addrspace(1)* nocapture %0, double addrspace(1)* nocapture readnone %1, i32 %2){
+; GCN: store double 0x2F42371D2126E970, double addrspace(1)* %0, align 8
+; GCN-NEXT: ret void
+  %4 = tail call contract double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5)
+  store double %4, double addrspace(1)* %0, align 8
+  ret void
+}
Index: llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1005,6 +1005,67 @@
 
     break;
   }
+  case Intrinsic::amdgcn_trig_preop: {
+
+    const uint32_t TwoByPi[] = {
+        0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,
+        0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,
+        0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
+        0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,
+        0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
+        0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,
+        0x56033046};
+
+    Value *Src = II.getArgOperand(0);
+    Value *Segment = II.getArgOperand(1);
+    const ConstantFP *Csrc = dyn_cast<ConstantFP>(Src);
+    const ConstantInt *Cseg = dyn_cast<ConstantInt>(Segment);
+
+    if (!(Csrc && Cseg))
+      break;
+
+    const APFloat &Fsrc = Csrc->getValueAPF();
+
+    const APInt &SegVal = Cseg->getUniqueInteger();
+    bool Ovflow;
+    unsigned Numbits = 32;
+    bool Signed = true;
+
+    APInt EClamp(Numbits, 1077, Signed);
+    APInt E = (Fsrc.bitcastToAPInt()).ashr(52);
+    E &= 0x7ff;
+    E = E.trunc(Numbits);
+    APInt Shift =
+        (E.sgt(EClamp) ? E.ssub_ov(EClamp, Ovflow) : APInt(Numbits, 0, Signed))
+            .sadd_ov(APInt(Numbits, 53, Signed).smul_ov(SegVal, Ovflow),
+                     Ovflow);
+    int32_t I = (Shift.ashr(5)).getSExtValue();
+    APInt Bshift = Shift & 0x1f;
+    Numbits = 64;
+    Signed = false;
+    APInt Thi = APInt(Numbits,
+                      (((uint64_t)TwoByPi[I] << 32) | (uint64_t)TwoByPi[I + 1]),
+                      Signed);
+    APInt Tlo = APInt(Numbits, ((uint64_t)TwoByPi[I + 2] << 32), Signed);
+
+    if (Bshift.sgt(0)) {
+      Numbits = 32;
+      Signed = true;
+      Thi = (Thi.shl(Bshift)) |
+            (Tlo.lshr(APInt(Numbits, 64, Signed).ssub_ov(Bshift, Ovflow)));
+    }
+
+    Thi = Thi.lshr(11);
+    APFloat Res = APFloat(Thi.roundToDouble());
+    int32_t Scale = -53 - Shift.getSExtValue();
+
+    if (E.sge(0x7b0))
+      Scale += 128;
+
+    Res = scalbn(Res, Scale, RoundingMode::NearestTiesToEven);
+    double Resd = Res.convertToDouble();
+    return IC.replaceInstUsesWith(II, ConstantFP::get(Src->getType(), Resd));
+  }
   case Intrinsic::amdgcn_fmul_legacy: {
     Value *Op0 = II.getArgOperand(0);
     Value *Op1 = II.getArgOperand(1);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D120150.428730.patch
Type: text/x-patch
Size: 3958 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220511/a355a0f3/attachment.bin>