[llvm] [AMDGPU] Implement IR expansion for frem instruction (PR #130988)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 12 18:19:36 PDT 2025
================
@@ -37,6 +41,377 @@ static cl::opt<unsigned>
cl::desc("fp convert instructions on integers with "
"more than <N> bits are expanded."));
+namespace {
+/// This class implements a precise expansion of the frem instruction.
+/// The generated code is based on the fmod implementation in the AMD device
+/// libs.
+class FRemExpander {
+ /// The IRBuilder to use for the expansion.
+ IRBuilder<> &B;
+
+ /// Floating point type of the return value and the arguments of the FRem
+ /// instructions that should be expanded.
+ Type *FremTy;
+
+ /// Floating point type to use for the computation. This may be
+ /// wider than the \p FremTy.
+ Type *ComputeFpTy;
+
+ /// Integer type that can hold floating point values of type \p FremTY.
+ Type *IntTy;
+
+ /// Integer type used to hold the exponents returned by frexp.
+ Type *ExTy;
+
+ /// How many bits of the quotient to compute per iteration of the
+ /// algorithm, stored as a value of type \p ExTy.
+ Value *Bits;
+
+ /// Constant 1 of type \p ExTy.
+ Value *One;
+
+ /// The sign bit for floating point values of type \p FremTy.
+ const unsigned long Signbit;
+
+public:
+ static std::optional<FRemExpander> create(IRBuilder<> &B, Type *Ty) {
+ if (Ty->is16bitFPTy())
+ return FRemExpander{B, Ty, 11, 0x8000, B.getFloatTy(), B.getInt16Ty()};
+ if (Ty->isFloatTy() || Ty->isHalfTy())
+ return FRemExpander{B, Ty, 12, 0x80000000L, Ty, B.getInt32Ty()};
+ if (Ty->isDoubleTy())
+ return FRemExpander{B, Ty, 26, 0x8000000000000000L, Ty, B.getInt64Ty()};
+
+ return std::nullopt;
+ }
+
+ /// Build the FRem expansion for the numerator \p X and the
+ /// denumerator \p Y using the builder \p B. The type of X and Y
+ /// must match the type for which the class instance has been
+ /// created. The code will be generated at the insertion point of \p
+ /// B and the insertion point will be reset at exit.
+ Value *buildFRem(Value *X, Value *Y) const;
+
+private:
+ FRemExpander(IRBuilder<> &B, Type *FremTy, short Bits, unsigned long Signbit,
+ Type *ComputeFpTy, Type *IntTy)
+ : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), IntTy(IntTy),
+ ExTy(B.getInt32Ty()), Bits(ConstantInt::get(ExTy, Bits)),
+ One(ConstantInt::get(ExTy, 1)), Signbit(Signbit) {};
+
+ Value *createLdexp(Value *Base, Value *Exp, const Twine &Name) const {
+ return B.CreateIntrinsic(Intrinsic::ldexp, {ComputeFpTy, B.getInt32Ty()},
+ {Base, Exp}, {}, Name);
+ }
+
+ Value *createRcp(Value *V, const Twine &Name) const {
+ return B.CreateFDiv(ConstantFP::get(ComputeFpTy, 1.0), V, Name);
+ }
+
+ // Helper function to build the UPDATE_AX code which is common to the
+ // loop body and the "final iteration".
+ Value *buildUpdateAx(Value *Ax, Value *Ay, Value *Ayinv) const {
+ // Build:
+ // float q = BUILTIN_RINT_ComputeFpTy(ax * ayinv);
+ // ax = fnma(q, ay, ax);
+ // int clt = ax < 0.0f;
+ // float axp = ax + ay;
+ // ax = clt ? axp : ax;
+ Value *Q = B.CreateUnaryIntrinsic(Intrinsic::rint, B.CreateFMul(Ax, Ayinv),
+ {}, "q");
+ Value *AxUpdate = B.CreateIntrinsic(Intrinsic::fma, {ComputeFpTy},
+ {B.CreateFNeg(Q), Ay, Ax}, {}, "ax");
----------------
arsenm wrote:
I would hope IRBuilder has a CreateFMA
https://github.com/llvm/llvm-project/pull/130988
More information about the llvm-commits
mailing list