[llvm] [AMDGPU] expand-fp: Change frem expansion criterion (PR #158285)

Fri Sep 12 06:22:14 PDT 2025

https://github.com/frederik-h updated https://github.com/llvm/llvm-project/pull/158285

>From 4845052677d3b2a08ff56302404317397ae19ed7 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Fri, 12 Sep 2025 06:52:05 -0400
Subject: [PATCH 1/2] [AMDGPU] expand-fp: Change frem expansion criterion

The existing condition for checking whether or not to expand an frem
instruction in the pass is not sufficiently precise.  Right now, it is
sufficient to ensure the correct working of the pass. But this is only
true in conjunction with the existing check for the
MaxLegalFpConvertBitWidth value which happens to exit early on targets
on which the frem condition is insufficient.

The correct working of the pass should not rely on this interaction.
The possibility of using the pass for handling further
expansions:(e.g. merging the very similar ExpandLargDivRem into it) is
also limited by this.

This patch changes the pass to expand frem for a target iff the
target's legalization action for the instruction with the scalar type
corresponding to the instruction type is LibCall but the libcall does
not exist. The legalization action for frem in the AMDGPU backend is
adjusted accordingly.
---
 llvm/lib/CodeGen/ExpandFp.cpp                 | 37 +++++++++++++------
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |  2 +-
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index 9cc6c6a706c58..6f4f049cc7f8e 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -979,14 +979,22 @@ static RTLIB::Libcall fremToLibcall(Type *Ty) {
   llvm_unreachable("Unknown floating point type");
 }
 
-/* Return true if, according to \p LibInfo, the target either directly
-   supports the frem instruction for the \p Ty, has a custom lowering,
-   or uses a libcall. */
-static bool targetSupportsFrem(const TargetLowering &TLI, Type *Ty) {
-  if (!TLI.isOperationExpand(ISD::FREM, EVT::getEVT(Ty)))
-    return true;
-
-  return TLI.getLibcallName(fremToLibcall(Ty->getScalarType()));
+/// Return true if the pass should expand a "frem" instruction of the
+/// given \p Ty for the target represented by \p TLI. Expansion
+/// should happen if the legalization for the scalar type uses a
+/// non-existing libcall. The scalar type is considered because it is
+/// easier to do so and it is highly unlikely that a vector type can
+/// be legalized without a libcall if the scalar type cannot.
+static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
+  Type *ScalarTy = Ty->getScalarType();
+  EVT VT = EVT::getEVT(ScalarTy);
+
+  TargetLowering::LegalizeAction LA = TLI.getOperationAction(ISD::FREM, VT);
+  if (LA != TargetLowering::LegalizeAction::LibCall)
+    return false;
+
+  bool MissingLibcall = !TLI.getLibcallName(fremToLibcall(ScalarTy));
+  return MissingLibcall && FRemExpander::canExpandType(ScalarTy);
 }
 
 static bool runImpl(Function &F, const TargetLowering &TLI,
@@ -1000,8 +1008,8 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
   if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS)
     MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
 
-  if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
-    return false;
+  bool TargetSkipExpandLargeFp =
+      MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS;
 
   for (auto &I : instructions(F)) {
     switch (I.getOpcode()) {
@@ -1011,8 +1019,7 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
       if (Ty->isScalableTy())
         continue;
 
-      if (targetSupportsFrem(TLI, Ty) ||
-          !FRemExpander::canExpandType(Ty->getScalarType()))
+      if (!shouldExpandFremType(TLI, Ty))
         continue;
 
       Replace.push_back(&I);
@@ -1022,6 +1029,9 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
     }
     case Instruction::FPToUI:
     case Instruction::FPToSI: {
+      if (TargetSkipExpandLargeFp)
+        continue;
+
       // TODO: This pass doesn't handle scalable vectors.
       if (I.getOperand(0)->getType()->isScalableTy())
         continue;
@@ -1039,6 +1049,9 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
     }
     case Instruction::UIToFP:
     case Instruction::SIToFP: {
+      if (TargetSkipExpandLargeFp)
+        continue;
+
       // TODO: This pass doesn't handle scalable vectors.
       if (I.getOperand(0)->getType()->isScalableTy())
         continue;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 5c9b616e9bc21..3892d7949a0fc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -423,7 +423,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f16, MVT::f32, MVT::f64},
                      Expand);
 
-  setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Expand);
+  setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, LibCall);
 
   if (Subtarget->has16BitInsts()) {
     setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);

>From 534b3e2f0787cfa2c13e0d8cf6356db12db27741 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <fharwath at amd.com>
Date: Fri, 12 Sep 2025 09:21:27 -0400
Subject: [PATCH 2/2] Revert Operation Action for frem to Expand

---
 llvm/lib/CodeGen/ExpandFp.cpp                 | 3 ++-
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index 6f4f049cc7f8e..fd376d54ad753 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -990,7 +990,8 @@ static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
   EVT VT = EVT::getEVT(ScalarTy);
 
   TargetLowering::LegalizeAction LA = TLI.getOperationAction(ISD::FREM, VT);
-  if (LA != TargetLowering::LegalizeAction::LibCall)
+  if (LA != TargetLowering::LegalizeAction::LibCall &&
+      LA != TargetLowering::LegalizeAction::Expand)
     return false;
 
   bool MissingLibcall = !TLI.getLibcallName(fremToLibcall(ScalarTy));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3892d7949a0fc..5c9b616e9bc21 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -423,7 +423,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f16, MVT::f32, MVT::f64},
                      Expand);
 
-  setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, LibCall);
+  setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Expand);
 
   if (Subtarget->has16BitInsts()) {
     setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);