[llvm] [NVPTX] remove incorrect NVPTX intrinsic transformations (PR #76870)

Mon Jan 8 11:13:22 PST 2024

https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/76870

>From c11ad36196895957b6c13e672be4bdaf8f2d5317 Mon Sep 17 00:00:00 2001
From: Alexander MacLean <amaclean at ipp2-2083.nvidia.com>
Date: Wed, 3 Jan 2024 02:08:07 +0000
Subject: [PATCH 1/2] [NVPTX] remove incorrect NVPTX intrinsic transformations

---
 .../Target/NVPTX/NVPTXTargetTransformInfo.cpp | 34 -------------
 .../InstCombine/NVPTX/nvvm-intrins.ll         | 48 +++++++------------
 2 files changed, 17 insertions(+), 65 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index c73721da46e359..7aa63f9fc0c966 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -180,10 +180,6 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
       return {Intrinsic::ceil, FTZ_MustBeOn};
     case Intrinsic::nvvm_fabs_d:
       return {Intrinsic::fabs, FTZ_Any};
-    case Intrinsic::nvvm_fabs_f:
-      return {Intrinsic::fabs, FTZ_MustBeOff};
-    case Intrinsic::nvvm_fabs_ftz_f:
-      return {Intrinsic::fabs, FTZ_MustBeOn};
     case Intrinsic::nvvm_floor_d:
       return {Intrinsic::floor, FTZ_Any};
     case Intrinsic::nvvm_floor_f:
@@ -264,12 +260,6 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
       return {Intrinsic::minimum, FTZ_MustBeOff, true};
     case Intrinsic::nvvm_fmin_ftz_nan_f16x2:
       return {Intrinsic::minimum, FTZ_MustBeOn, true};
-    case Intrinsic::nvvm_round_d:
-      return {Intrinsic::round, FTZ_Any};
-    case Intrinsic::nvvm_round_f:
-      return {Intrinsic::round, FTZ_MustBeOff};
-    case Intrinsic::nvvm_round_ftz_f:
-      return {Intrinsic::round, FTZ_MustBeOn};
     case Intrinsic::nvvm_sqrt_rn_d:
       return {Intrinsic::sqrt, FTZ_Any};
     case Intrinsic::nvvm_sqrt_f:
@@ -278,10 +268,6 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
       // the ftz-ness of the surrounding code.  sqrt_rn_f and sqrt_rn_ftz_f are
       // the versions with explicit ftz-ness.
       return {Intrinsic::sqrt, FTZ_Any};
-    case Intrinsic::nvvm_sqrt_rn_f:
-      return {Intrinsic::sqrt, FTZ_MustBeOff};
-    case Intrinsic::nvvm_sqrt_rn_ftz_f:
-      return {Intrinsic::sqrt, FTZ_MustBeOn};
     case Intrinsic::nvvm_trunc_d:
       return {Intrinsic::trunc, FTZ_Any};
     case Intrinsic::nvvm_trunc_f:
@@ -316,24 +302,8 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
       return {Instruction::UIToFP};
 
     // NVVM intrinsics that map to LLVM binary ops.
-    case Intrinsic::nvvm_add_rn_d:
-      return {Instruction::FAdd, FTZ_Any};
-    case Intrinsic::nvvm_add_rn_f:
-      return {Instruction::FAdd, FTZ_MustBeOff};
-    case Intrinsic::nvvm_add_rn_ftz_f:
-      return {Instruction::FAdd, FTZ_MustBeOn};
-    case Intrinsic::nvvm_mul_rn_d:
-      return {Instruction::FMul, FTZ_Any};
-    case Intrinsic::nvvm_mul_rn_f:
-      return {Instruction::FMul, FTZ_MustBeOff};
-    case Intrinsic::nvvm_mul_rn_ftz_f:
-      return {Instruction::FMul, FTZ_MustBeOn};
     case Intrinsic::nvvm_div_rn_d:
       return {Instruction::FDiv, FTZ_Any};
-    case Intrinsic::nvvm_div_rn_f:
-      return {Instruction::FDiv, FTZ_MustBeOff};
-    case Intrinsic::nvvm_div_rn_ftz_f:
-      return {Instruction::FDiv, FTZ_MustBeOn};
 
     // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
     // need special handling.
@@ -342,10 +312,6 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
     // as well.
     case Intrinsic::nvvm_rcp_rn_d:
       return {SPC_Reciprocal, FTZ_Any};
-    case Intrinsic::nvvm_rcp_rn_f:
-      return {SPC_Reciprocal, FTZ_MustBeOff};
-    case Intrinsic::nvvm_rcp_rn_ftz_f:
-      return {SPC_Reciprocal, FTZ_MustBeOn};
 
       // We do not currently simplify intrinsics that give an approximate
       // answer. These include:
diff --git a/llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll b/llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll
index ca1a5237f905d5..2f1e8f336d0574 100644
--- a/llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll
+++ b/llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll
@@ -49,15 +49,13 @@ define double @fabs_double(double %a) #0 {
 }
 ; CHECK-LABEL: @fabs_float
 define float @fabs_float(float %a) #0 {
-; NOFTZ: call float @llvm.fabs.f32
-; FTZ: call float @llvm.nvvm.fabs.f
+; CHECK: call float @llvm.nvvm.fabs.f
   %ret = call float @llvm.nvvm.fabs.f(float %a)
   ret float %ret
 }
 ; CHECK-LABEL: @fabs_float_ftz
 define float @fabs_float_ftz(float %a) #0 {
-; NOFTZ: call float @llvm.nvvm.fabs.ftz.f
-; FTZ: call float @llvm.fabs.f32
+; CHECK: call float @llvm.nvvm.fabs.ftz.f
   %ret = call float @llvm.nvvm.fabs.ftz.f(float %a)
   ret float %ret
 }
@@ -148,21 +146,19 @@ define float @fmin_float_ftz(float %a, float %b) #0 {
 
 ; CHECK-LABEL: @round_double
 define double @round_double(double %a) #0 {
-; CHECK: call double @llvm.round.f64
+; CHECK: call double @llvm.nvvm.round.d
   %ret = call double @llvm.nvvm.round.d(double %a)
   ret double %ret
 }
 ; CHECK-LABEL: @round_float
 define float @round_float(float %a) #0 {
-; NOFTZ: call float @llvm.round.f32
-; FTZ: call float @llvm.nvvm.round.f
+; CHECK: call float @llvm.nvvm.round.f
   %ret = call float @llvm.nvvm.round.f(float %a)
   ret float %ret
 }
 ; CHECK-LABEL: @round_float_ftz
 define float @round_float_ftz(float %a) #0 {
-; NOFTZ: call float @llvm.nvvm.round.ftz.f
-; FTZ: call float @llvm.round.f32
+; CHECK: call float @llvm.nvvm.round.ftz.f
   %ret = call float @llvm.nvvm.round.ftz.f(float %a)
   ret float %ret
 }
@@ -292,42 +288,38 @@ define float @test_ull2f(i64 %a) #0 {
 
 ; CHECK-LABEL: @test_add_rn_d
 define double @test_add_rn_d(double %a, double %b) #0 {
-; CHECK: fadd
+; CHECK: call double @llvm.nvvm.add.rn.d
   %ret = call double @llvm.nvvm.add.rn.d(double %a, double %b)
   ret double %ret
 }
 ; CHECK-LABEL: @test_add_rn_f
 define float @test_add_rn_f(float %a, float %b) #0 {
-; NOFTZ: fadd
-; FTZ: call float @llvm.nvvm.add.rn.f
+; CHECK: call float @llvm.nvvm.add.rn.f
   %ret = call float @llvm.nvvm.add.rn.f(float %a, float %b)
   ret float %ret
 }
 ; CHECK-LABEL: @test_add_rn_f_ftz
 define float @test_add_rn_f_ftz(float %a, float %b) #0 {
-; NOFTZ: call float @llvm.nvvm.add.rn.f
-; FTZ: fadd
+; CHECK: call float @llvm.nvvm.add.rn.f
   %ret = call float @llvm.nvvm.add.rn.ftz.f(float %a, float %b)
   ret float %ret
 }
 
 ; CHECK-LABEL: @test_mul_rn_d
 define double @test_mul_rn_d(double %a, double %b) #0 {
-; CHECK: fmul
+; CHECK: call double @llvm.nvvm.mul.rn.d
   %ret = call double @llvm.nvvm.mul.rn.d(double %a, double %b)
   ret double %ret
 }
 ; CHECK-LABEL: @test_mul_rn_f
 define float @test_mul_rn_f(float %a, float %b) #0 {
-; NOFTZ: fmul
-; FTZ: call float @llvm.nvvm.mul.rn.f
+; CHECK: call float @llvm.nvvm.mul.rn.f
   %ret = call float @llvm.nvvm.mul.rn.f(float %a, float %b)
   ret float %ret
 }
 ; CHECK-LABEL: @test_mul_rn_f_ftz
 define float @test_mul_rn_f_ftz(float %a, float %b) #0 {
-; NOFTZ: call float @llvm.nvvm.mul.rn.f
-; FTZ: fmul
+; CHECK: call float @llvm.nvvm.mul.rn.f
   %ret = call float @llvm.nvvm.mul.rn.ftz.f(float %a, float %b)
   ret float %ret
 }
@@ -340,15 +332,13 @@ define double @test_div_rn_d(double %a, double %b) #0 {
 }
 ; CHECK-LABEL: @test_div_rn_f
 define float @test_div_rn_f(float %a, float %b) #0 {
-; NOFTZ: fdiv
-; FTZ: call float @llvm.nvvm.div.rn.f
+; CHECK: call float @llvm.nvvm.div.rn.f
   %ret = call float @llvm.nvvm.div.rn.f(float %a, float %b)
   ret float %ret
 }
 ; CHECK-LABEL: @test_div_rn_f_ftz
 define float @test_div_rn_f_ftz(float %a, float %b) #0 {
-; NOFTZ: call float @llvm.nvvm.div.rn.f
-; FTZ: fdiv
+; CHECK: call float @llvm.nvvm.div.rn.f
   %ret = call float @llvm.nvvm.div.rn.ftz.f(float %a, float %b)
   ret float %ret
 }
@@ -357,15 +347,13 @@ define float @test_div_rn_f_ftz(float %a, float %b) #0 {
 
 ; CHECK-LABEL: @test_rcp_rn_f
 define float @test_rcp_rn_f(float %a) #0 {
-; NOFTZ: fdiv float 1.0{{.*}} %a
-; FTZ: call float @llvm.nvvm.rcp.rn.f
+; CEHCK: call float @llvm.nvvm.rcp.rn.f
   %ret = call float @llvm.nvvm.rcp.rn.f(float %a)
   ret float %ret
 }
 ; CHECK-LABEL: @test_rcp_rn_f_ftz
 define float @test_rcp_rn_f_ftz(float %a) #0 {
-; NOFTZ: call float @llvm.nvvm.rcp.rn.f
-; FTZ: fdiv float 1.0{{.*}} %a
+; CHECK: call float @llvm.nvvm.rcp.rn.f
   %ret = call float @llvm.nvvm.rcp.rn.ftz.f(float %a)
   ret float %ret
 }
@@ -385,15 +373,13 @@ define float @test_sqrt_f(float %a) #0 {
 }
 ; CHECK-LABEL: @test_sqrt_rn_f
 define float @test_sqrt_rn_f(float %a) #0 {
-; NOFTZ: call float @llvm.sqrt.f32(float %a)
-; FTZ: call float @llvm.nvvm.sqrt.rn.f
+; CHECK: call float @llvm.nvvm.sqrt.rn.f
   %ret = call float @llvm.nvvm.sqrt.rn.f(float %a)
   ret float %ret
 }
 ; CHECK-LABEL: @test_sqrt_rn_f_ftz
 define float @test_sqrt_rn_f_ftz(float %a) #0 {
-; NOFTZ: call float @llvm.nvvm.sqrt.rn.f
-; FTZ: call float @llvm.sqrt.f32(float %a)
+; CHECK: call float @llvm.nvvm.sqrt.rn.f
   %ret = call float @llvm.nvvm.sqrt.rn.ftz.f(float %a)
   ret float %ret
 }

>From 46945c5aba1a87333e21dcabc9ebc070419c1809 Mon Sep 17 00:00:00 2001
From: Alex MacLean <amaclean at nvidia.com>
Date: Mon, 8 Jan 2024 19:01:54 +0000
Subject: [PATCH 2/2] fix typo + cleanup in nvvm-intrins.ll test

---
 .../Transforms/InstCombine/NVPTX/nvvm-intrins.ll     | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll b/llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll
index 2f1e8f336d0574..633aa43c4fc89c 100644
--- a/llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll
+++ b/llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll
@@ -300,7 +300,7 @@ define float @test_add_rn_f(float %a, float %b) #0 {
 }
 ; CHECK-LABEL: @test_add_rn_f_ftz
 define float @test_add_rn_f_ftz(float %a, float %b) #0 {
-; CHECK: call float @llvm.nvvm.add.rn.f
+; CHECK: call float @llvm.nvvm.add.rn.ftz.f(float %a, float %b)
   %ret = call float @llvm.nvvm.add.rn.ftz.f(float %a, float %b)
   ret float %ret
 }
@@ -319,7 +319,7 @@ define float @test_mul_rn_f(float %a, float %b) #0 {
 }
 ; CHECK-LABEL: @test_mul_rn_f_ftz
 define float @test_mul_rn_f_ftz(float %a, float %b) #0 {
-; CHECK: call float @llvm.nvvm.mul.rn.f
+; CHECK: call float @llvm.nvvm.mul.rn.ftz.f(float %a, float %b)
   %ret = call float @llvm.nvvm.mul.rn.ftz.f(float %a, float %b)
   ret float %ret
 }
@@ -338,7 +338,7 @@ define float @test_div_rn_f(float %a, float %b) #0 {
 }
 ; CHECK-LABEL: @test_div_rn_f_ftz
 define float @test_div_rn_f_ftz(float %a, float %b) #0 {
-; CHECK: call float @llvm.nvvm.div.rn.f
+; CHECK: call float @llvm.nvvm.div.rn.ftz.f(float %a, float %b)
   %ret = call float @llvm.nvvm.div.rn.ftz.f(float %a, float %b)
   ret float %ret
 }
@@ -347,13 +347,13 @@ define float @test_div_rn_f_ftz(float %a, float %b) #0 {
 
 ; CHECK-LABEL: @test_rcp_rn_f
 define float @test_rcp_rn_f(float %a) #0 {
-; CEHCK: call float @llvm.nvvm.rcp.rn.f
+; CHECK: call float @llvm.nvvm.rcp.rn.f
   %ret = call float @llvm.nvvm.rcp.rn.f(float %a)
   ret float %ret
 }
 ; CHECK-LABEL: @test_rcp_rn_f_ftz
 define float @test_rcp_rn_f_ftz(float %a) #0 {
-; CHECK: call float @llvm.nvvm.rcp.rn.f
+; CHECK: call float @llvm.nvvm.rcp.rn.ftz.f(float %a)
   %ret = call float @llvm.nvvm.rcp.rn.ftz.f(float %a)
   ret float %ret
 }
@@ -379,7 +379,7 @@ define float @test_sqrt_rn_f(float %a) #0 {
 }
 ; CHECK-LABEL: @test_sqrt_rn_f_ftz
 define float @test_sqrt_rn_f_ftz(float %a) #0 {
-; CHECK: call float @llvm.nvvm.sqrt.rn.f
+; CHECK: call float @llvm.nvvm.sqrt.rn.ftz.f(float %a)
   %ret = call float @llvm.nvvm.sqrt.rn.ftz.f(float %a)
   ret float %ret
 }