[llvm] AMDGPU: Don't fold rootn(x, 1) to input for strictfp functions (PR #92595)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon May 20 09:39:50 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/92595
>From 92b6815c53c0f9296da896d70b71be17541aa182 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 2 Dec 2023 13:22:39 +0900
Subject: [PATCH] AMDGPU: Don't fold rootn(x, 1) to input for strictfp
functions
We need to insert a constrained canonicalize.
---
llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp | 11 ++++++++---
.../CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll | 6 ++++--
2 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 0a5fbf5034c01..47de1791dae31 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1163,14 +1163,19 @@ bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B,
if (!match(opr1, m_APIntAllowPoison(CINT)))
return false;
+ Function *Parent = B.GetInsertBlock()->getParent();
+
int ci_opr1 = (int)CINT->getSExtValue();
- if (ci_opr1 == 1) { // rootn(x, 1) = x
- LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n");
+ if (ci_opr1 == 1 && !Parent->hasFnAttribute(Attribute::StrictFP)) {
+ // rootn(x, 1) = x
+ //
+ // TODO: Insert constrained canonicalize for strictfp case.
+ LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << '\n');
replaceCall(FPOp, opr0);
return true;
}
- Module *M = B.GetInsertBlock()->getModule();
+ Module *M = Parent->getParent();
if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
if (FunctionCallee FPExpr =
getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
index f79983e2491a4..d75517cb26875 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
@@ -511,7 +511,8 @@ define float @test_rootn_f32__y_1__strictfp(float %x) #1 {
; CHECK-LABEL: define float @test_rootn_f32__y_1__strictfp(
; CHECK-SAME: float [[X:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: ret float [[X]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call float @_Z5rootnfi(float [[X]], i32 1) #[[ATTR0]]
+; CHECK-NEXT: ret float [[CALL]]
;
entry:
%call = tail call float @_Z5rootnfi(float %x, i32 1) #1
@@ -533,7 +534,8 @@ define <2 x float> @test_rootn_v2f32__y_1__strictfp(<2 x float> %x) #1 {
; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_1__strictfp(
; CHECK-SAME: <2 x float> [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: ret <2 x float> [[X]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 1, i32 1>) #[[ATTR0]]
+; CHECK-NEXT: ret <2 x float> [[CALL]]
;
entry:
%call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 1, i32 1>) #1
More information about the llvm-commits
mailing list