[llvm] [AMDGPU] Constant folding of llvm.amdgcn.trig.preop (PR #98562)

Wed Jul 17 11:36:53 PDT 2024

================
@@ -1102,6 +1102,81 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
 
     break;
   }
+  case Intrinsic::amdgcn_trig_preop: {
+    // The intrinsic is declared with name mangling, but currently the
+    // instruction only exists for f64
+    if (!II.getType()->isDoubleTy())
+      break;
+
+    Value *Src = II.getArgOperand(0);
+    Value *Segment = II.getArgOperand(1);
+    if (isa<PoisonValue>(Src))
+      return IC.replaceInstUsesWith(II, Src);
+
+    if (isa<UndefValue>(Src)) {
+      auto *QNaN = ConstantFP::get(
+          II.getType(), APFloat::getQNaN(II.getType()->getFltSemantics()));
+      return IC.replaceInstUsesWith(II, QNaN);
+    }
+
+    const ConstantFP *Csrc = dyn_cast<ConstantFP>(Src);
+    if (!Csrc)
+      break;
+
+    if (II.isStrictFP())
+      break;
+
+    const APFloat &Fsrc = Csrc->getValueAPF();
+    if (Fsrc.isNaN()) {
+      // FIXME: We just need to make the nan quiet here, but that's unavailable
+      // on APFloat, only IEEEfloat
+      auto *Quieted = ConstantFP::get(
+          II.getType(), scalbn(Fsrc, 0, APFloat::rmNearestTiesToEven));
+      return IC.replaceInstUsesWith(II, Quieted);
+    }
+
+    const ConstantInt *Cseg = dyn_cast<ConstantInt>(Segment);
+    if (!Cseg)
+      break;
+
+    uint64_t Exponent = (Fsrc.bitcastToAPInt().getZExtValue() >> 52) & 0x7ff;
----------------
changpeng wrote:

Right!

https://github.com/llvm/llvm-project/pull/98562