[llvm] [AMDGPU] expand-fp: Change frem expansion criterion (PR #158285)

Tue Sep 16 02:45:58 PDT 2025

https://github.com/frederik-h updated https://github.com/llvm/llvm-project/pull/158285

>From 4845052677d3b2a08ff56302404317397ae19ed7 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Fri, 12 Sep 2025 06:52:05 -0400
Subject: [PATCH 1/3] [AMDGPU] expand-fp: Change frem expansion criterion

The existing condition for checking whether or not to expand an frem
instruction in the pass is not sufficiently precise.  Right now, it is
sufficient to ensure the correct working of the pass. But this is only
true in conjunction with the existing check for the
MaxLegalFpConvertBitWidth value which happens to exit early on targets
on which the frem condition is insufficient.

The correct working of the pass should not rely on this interaction.
The possibility of using the pass for handling further
expansions:(e.g. merging the very similar ExpandLargDivRem into it) is
also limited by this.

This patch changes the pass to expand frem for a target iff the
target's legalization action for the instruction with the scalar type
corresponding to the instruction type is LibCall but the libcall does
not exist. The legalization action for frem in the AMDGPU backend is
adjusted accordingly.
---
 llvm/lib/CodeGen/ExpandFp.cpp                 | 37 +++++++++++++------
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |  2 +-
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index 9cc6c6a706c58..6f4f049cc7f8e 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -979,14 +979,22 @@ static RTLIB::Libcall fremToLibcall(Type *Ty) {
   llvm_unreachable("Unknown floating point type");
 }
 
-/* Return true if, according to \p LibInfo, the target either directly
-   supports the frem instruction for the \p Ty, has a custom lowering,
-   or uses a libcall. */
-static bool targetSupportsFrem(const TargetLowering &TLI, Type *Ty) {
-  if (!TLI.isOperationExpand(ISD::FREM, EVT::getEVT(Ty)))
-    return true;
-
-  return TLI.getLibcallName(fremToLibcall(Ty->getScalarType()));
+/// Return true if the pass should expand a "frem" instruction of the
+/// given \p Ty for the target represented by \p TLI. Expansion
+/// should happen if the legalization for the scalar type uses a
+/// non-existing libcall. The scalar type is considered because it is
+/// easier to do so and it is highly unlikely that a vector type can
+/// be legalized without a libcall if the scalar type cannot.
+static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
+  Type *ScalarTy = Ty->getScalarType();
+  EVT VT = EVT::getEVT(ScalarTy);
+
+  TargetLowering::LegalizeAction LA = TLI.getOperationAction(ISD::FREM, VT);
+  if (LA != TargetLowering::LegalizeAction::LibCall)
+    return false;
+
+  bool MissingLibcall = !TLI.getLibcallName(fremToLibcall(ScalarTy));
+  return MissingLibcall && FRemExpander::canExpandType(ScalarTy);
 }
 
 static bool runImpl(Function &F, const TargetLowering &TLI,
@@ -1000,8 +1008,8 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
   if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS)
     MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
 
-  if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
-    return false;
+  bool TargetSkipExpandLargeFp =
+      MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS;
 
   for (auto &I : instructions(F)) {
     switch (I.getOpcode()) {
@@ -1011,8 +1019,7 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
       if (Ty->isScalableTy())
         continue;
 
-      if (targetSupportsFrem(TLI, Ty) ||
-          !FRemExpander::canExpandType(Ty->getScalarType()))
+      if (!shouldExpandFremType(TLI, Ty))
         continue;
 
       Replace.push_back(&I);
@@ -1022,6 +1029,9 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
     }
     case Instruction::FPToUI:
     case Instruction::FPToSI: {
+      if (TargetSkipExpandLargeFp)
+        continue;
+
       // TODO: This pass doesn't handle scalable vectors.
       if (I.getOperand(0)->getType()->isScalableTy())
         continue;
@@ -1039,6 +1049,9 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
     }
     case Instruction::UIToFP:
     case Instruction::SIToFP: {
+      if (TargetSkipExpandLargeFp)
+        continue;
+
       // TODO: This pass doesn't handle scalable vectors.
       if (I.getOperand(0)->getType()->isScalableTy())
         continue;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 5c9b616e9bc21..3892d7949a0fc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -423,7 +423,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f16, MVT::f32, MVT::f64},
                      Expand);
 
-  setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Expand);
+  setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, LibCall);
 
   if (Subtarget->has16BitInsts()) {
     setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);

>From 534b3e2f0787cfa2c13e0d8cf6356db12db27741 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Fri, 12 Sep 2025 09:21:27 -0400
Subject: [PATCH 2/3] Revert Operation Action for frem to Expand

---
 llvm/lib/CodeGen/ExpandFp.cpp                 | 3 ++-
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index 6f4f049cc7f8e..fd376d54ad753 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -990,7 +990,8 @@ static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
   EVT VT = EVT::getEVT(ScalarTy);
 
   TargetLowering::LegalizeAction LA = TLI.getOperationAction(ISD::FREM, VT);
-  if (LA != TargetLowering::LegalizeAction::LibCall)
+  if (LA != TargetLowering::LegalizeAction::LibCall &&
+      LA != TargetLowering::LegalizeAction::Expand)
     return false;
 
   bool MissingLibcall = !TLI.getLibcallName(fremToLibcall(ScalarTy));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3892d7949a0fc..5c9b616e9bc21 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -423,7 +423,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f16, MVT::f32, MVT::f64},
                      Expand);
 
-  setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, LibCall);
+  setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Expand);
 
   if (Subtarget->has16BitInsts()) {
     setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);

>From 61ca19c0ca9b9c42bd9bd5ddbb1be175d0403307 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Tue, 16 Sep 2025 05:41:22 -0400
Subject: [PATCH 3/3] [AMDGPU] expand-fp: Add early exit for targets that don't
 require any expansion

This commit adds a function to check if a target needs expansion
of any type of frem instructions. In conjunction with the trivial
check if the large fp conversion expansions are necessary, this
can be used to perform any early exit from the pass if no
expansions are needed for a target.
---
 llvm/lib/CodeGen/ExpandFp.cpp | 112 ++++++++++++++++++++--------------
 1 file changed, 66 insertions(+), 46 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index fd376d54ad753..0d0925472a6a8 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -74,11 +74,63 @@ class FRemExpander {
   /// Constant 1 of type \p ExTy.
   Value *One;
 
+  /// The frem argument/return types that can be expanded by this class.
+  // TODO The expansion could work for other floating point types
+  // as well, but this would require additional testing.
+  inline static const SmallVector<MVT, 3> ExpandableTypes{MVT::f16, MVT::f32,
+                                                          MVT::f64};
+
+  /// Libcalls for frem instructions of the type at the corresponding
+  /// positions of ExpandableTypes.
+  inline static const SmallVector<RTLIB::Libcall, 3> FremLibcalls{
+      RTLIB::REM_F32, RTLIB::REM_F32, RTLIB::REM_F64};
+
+  /// Return the Libcall for frem instructions of expandable type \p VT or
+  /// std::nullopt if \p VT is not expandable.
+  static std::optional<RTLIB::Libcall> getFremLibcallForType(EVT VT) {
+    auto *It = find(ExpandableTypes, VT.getSimpleVT());
+    if (It == ExpandableTypes.end())
+      return {};
+
+    return FremLibcalls[It - ExpandableTypes.begin()];
+  };
+
 public:
   static bool canExpandType(Type *Ty) {
-    // TODO The expansion should work for other floating point types
-    // as well, but this would require additional testing.
-    return Ty->isIEEELikeFPTy() && !Ty->isBFloatTy() && !Ty->isFP128Ty();
+    EVT VT = EVT::getEVT(Ty);
+    assert(VT.isSimple() && "Can expand only simple types");
+
+    return find(ExpandableTypes, VT.getSimpleVT());
+  }
+
+  /// Return true if the pass should expand a frem instruction of the
+  /// given \p Ty for the target represented by \p TLI. Expansion
+  /// should happen if the legalization for the scalar type uses a
+  /// non-existing libcall.
+  static bool shouldExpandFremType(const TargetLowering &TLI, EVT VT) {
+    TargetLowering::LegalizeAction LA = TLI.getOperationAction(ISD::FREM, VT);
+    if (LA != TargetLowering::LegalizeAction::LibCall &&
+        LA != TargetLowering::LegalizeAction::Expand)
+      return false;
+
+    auto Libcall = getFremLibcallForType(VT);
+    bool MissingLibcall = Libcall.has_value() && !TLI.getLibcallName(*Libcall);
+    return MissingLibcall;
+  }
+
+  static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
+    // Consider scalar type for simplicity.
+    // It is very unlikely that a vector type can be legalized without a libcall
+    // if the scalar type cannot.
+    return shouldExpandFremType(TLI, EVT::getEVT(Ty->getScalarType()));
+  }
+
+  /// Return true if the pass should expand "frem" instructions of some any for
+  /// the target represented by \p TLI.
+  static bool shouldExpandAnyFremType(const TargetLowering &TLI) {
+    return std::any_of(
+        ExpandableTypes.begin(), ExpandableTypes.end(),
+        [&](MVT V) { return shouldExpandFremType(TLI, EVT(V)); });
   }
 
   static FRemExpander create(IRBuilder<> &B, Type *Ty) {
@@ -959,45 +1011,6 @@ static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) {
   I->eraseFromParent();
 }
 
-// This covers all floating point types; more than we need here.
-// TODO Move somewhere else for general use?
-/// Return the Libcall for a frem instruction of
-/// type \p Ty.
-static RTLIB::Libcall fremToLibcall(Type *Ty) {
-  assert(Ty->isFloatingPointTy());
-  if (Ty->isFloatTy() || Ty->is16bitFPTy())
-    return RTLIB::REM_F32;
-  if (Ty->isDoubleTy())
-    return RTLIB::REM_F64;
-  if (Ty->isFP128Ty())
-    return RTLIB::REM_F128;
-  if (Ty->isX86_FP80Ty())
-    return RTLIB::REM_F80;
-  if (Ty->isPPC_FP128Ty())
-    return RTLIB::REM_PPCF128;
-
-  llvm_unreachable("Unknown floating point type");
-}
-
-/// Return true if the pass should expand a "frem" instruction of the
-/// given \p Ty for the target represented by \p TLI. Expansion
-/// should happen if the legalization for the scalar type uses a
-/// non-existing libcall. The scalar type is considered because it is
-/// easier to do so and it is highly unlikely that a vector type can
-/// be legalized without a libcall if the scalar type cannot.
-static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
-  Type *ScalarTy = Ty->getScalarType();
-  EVT VT = EVT::getEVT(ScalarTy);
-
-  TargetLowering::LegalizeAction LA = TLI.getOperationAction(ISD::FREM, VT);
-  if (LA != TargetLowering::LegalizeAction::LibCall &&
-      LA != TargetLowering::LegalizeAction::Expand)
-    return false;
-
-  bool MissingLibcall = !TLI.getLibcallName(fremToLibcall(ScalarTy));
-  return MissingLibcall && FRemExpander::canExpandType(ScalarTy);
-}
-
 static bool runImpl(Function &F, const TargetLowering &TLI,
                     AssumptionCache *AC) {
   SmallVector<Instruction *, 4> Replace;
@@ -1009,18 +1022,25 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
   if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS)
     MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
 
-  bool TargetSkipExpandLargeFp =
+  bool DisableExpandLargeFp =
       MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS;
+  bool DisableFrem = !FRemExpander::shouldExpandAnyFremType(TLI);
+
+  if (DisableExpandLargeFp && DisableFrem)
+    return false;
 
   for (auto &I : instructions(F)) {
     switch (I.getOpcode()) {
     case Instruction::FRem: {
+      if (DisableFrem)
+        continue;
+
       Type *Ty = I.getType();
       // TODO: This pass doesn't handle scalable vectors.
       if (Ty->isScalableTy())
         continue;
 
-      if (!shouldExpandFremType(TLI, Ty))
+      if (!FRemExpander::shouldExpandFremType(TLI, Ty))
         continue;
 
       Replace.push_back(&I);
@@ -1030,7 +1050,7 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
     }
     case Instruction::FPToUI:
     case Instruction::FPToSI: {
-      if (TargetSkipExpandLargeFp)
+      if (DisableExpandLargeFp)
         continue;
 
       // TODO: This pass doesn't handle scalable vectors.
@@ -1050,7 +1070,7 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
     }
     case Instruction::UIToFP:
     case Instruction::SIToFP: {
-      if (TargetSkipExpandLargeFp)
+      if (DisableExpandLargeFp)
         continue;
 
       // TODO: This pass doesn't handle scalable vectors.