[llvm] AMDGPU: Add more tests for fmed3 instcombine folds (PR #139529)
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 12 03:28:57 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
Add test with snan literals, and test with and without amdgpu-ieee
---
Full diff: https://github.com/llvm/llvm-project/pull/139529.diff
2 Files Affected:
- (modified) llvm/test/Transforms/InstCombine/AMDGPU/fmed3-fpext-fold.ll (+32-3)
- (modified) llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll (+257-2)
``````````diff
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3-fpext-fold.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3-fpext-fold.ll
index a31b47b2ca6e7..66011ad1ac76f 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3-fpext-fold.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3-fpext-fold.ll
@@ -605,9 +605,38 @@ define float @fmed3_f32_fpext_f16_unrepresentable_k2(half %arg0, half %arg1) #1
ret float %med3
}
+define float @fmed3_f32_fpext_f16_strictfp(half %arg0, half %arg1, half %arg2) #2 {
+; UNKNOWN-LABEL: define float @fmed3_f32_fpext_f16_strictfp
+; UNKNOWN-SAME: (half [[ARG0:%.*]], half [[ARG1:%.*]], half [[ARG2:%.*]]) #[[ATTR2:[0-9]+]] {
+; UNKNOWN-NEXT: [[ARG0_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG0]], metadata !"fpexcept.strict")
+; UNKNOWN-NEXT: [[ARG1_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG1]], metadata !"fpexcept.strict")
+; UNKNOWN-NEXT: [[ARG2_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG2]], metadata !"fpexcept.strict")
+; UNKNOWN-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[ARG0_EXT]], float [[ARG1_EXT]], float [[ARG2_EXT]]) #[[ATTR2]]
+; UNKNOWN-NEXT: ret float [[MED3]]
+;
+; GFX8-LABEL: define float @fmed3_f32_fpext_f16_strictfp
+; GFX8-SAME: (half [[ARG0:%.*]], half [[ARG1:%.*]], half [[ARG2:%.*]]) #[[ATTR2:[0-9]+]] {
+; GFX8-NEXT: [[ARG0_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG0]], metadata !"fpexcept.strict")
+; GFX8-NEXT: [[ARG1_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG1]], metadata !"fpexcept.strict")
+; GFX8-NEXT: [[ARG2_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG2]], metadata !"fpexcept.strict")
+; GFX8-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[ARG0_EXT]], float [[ARG1_EXT]], float [[ARG2_EXT]]) #[[ATTR4:[0-9]+]]
+; GFX8-NEXT: ret float [[MED3]]
+;
+; GFX9-LABEL: define float @fmed3_f32_fpext_f16_strictfp
+; GFX9-SAME: (half [[ARG0:%.*]], half [[ARG1:%.*]], half [[ARG2:%.*]]) #[[ATTR2:[0-9]+]] {
+; GFX9-NEXT: [[ARG0_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG0]], metadata !"fpexcept.strict")
+; GFX9-NEXT: [[ARG1_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG1]], metadata !"fpexcept.strict")
+; GFX9-NEXT: [[ARG2_EXT:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half [[ARG2]], metadata !"fpexcept.strict")
+; GFX9-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[ARG0_EXT]], float [[ARG1_EXT]], float [[ARG2_EXT]]) #[[ATTR5:[0-9]+]]
+; GFX9-NEXT: ret float [[MED3]]
+;
+ %arg0.ext = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg0, metadata !"fpexcept.strict")
+ %arg1.ext = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg1, metadata !"fpexcept.strict")
+ %arg2.ext = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg2, metadata !"fpexcept.strict")
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext) #2
+ ret float %med3
+}
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX8: {{.*}}
-; UNKNOWN: {{.*}}
+attributes #2 = { strictfp }
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
index 311846b391e2b..5274ac1093a26 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
@@ -1,5 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine < %s | FileCheck %s
+
+; Test with "amdgpu-ieee" set to true and false
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine -mcpu=gfx600 < %s | FileCheck -check-prefixes=CHECK,IEEE1 %s
+; RUN: sed 's/\"true\"/\"false\"/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -passes=instcombine | FileCheck -check-prefixes=CHECK,IEEE0 %s
+
+; Test with gfx12 since there is no ieee bit anymore.
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=instcombine < %s | FileCheck -check-prefixes=CHECK,IEEE1 %s
+; RUN: sed 's/\"true\"/\"false\"/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=instcombine | FileCheck -check-prefixes=CHECK,IEEE0 %s
+
; --------------------------------------------------------------------
; llvm.amdgcn.fmed3
; --------------------------------------------------------------------
@@ -328,5 +336,252 @@ define float @fmed3_x_y_poison_f32(float %x, float %y) #1 {
ret float %med3
}
+define float @fmed3_snan1_x_y_f32(float %x, float %y) #1 {
+; CHECK-LABEL: define float @fmed3_snan1_x_y_f32(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: ret float [[MED3]]
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float %x, float %y)
+ ret float %med3
+}
+
+define float @fmed3_x_snan1_y_f32(float %x, float %y) #1 {
+; CHECK-LABEL: define float @fmed3_x_snan1_y_f32(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: ret float [[MED3]]
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF4000000000000, float %y)
+ ret float %med3
+}
+
+define float @fmed3_x_y_snan1_f32(float %x, float %y) #1 {
+; CHECK-LABEL: define float @fmed3_x_y_snan1_f32(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: ret float [[MED3]]
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000)
+ ret float %med3
+}
+
+define float @fmed3_snan1_x_snan2_f32(float %x) #1 {
+; CHECK-LABEL: define float @fmed3_snan1_x_snan2_f32(
+; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: ret float [[X]]
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF0000020000000, float %x, float 0x7FF0000040000000)
+ ret float %med3
+}
+
+define float @fmed3_x_snan1_snan2_f32(float %x) #1 {
+; CHECK-LABEL: define float @fmed3_x_snan1_snan2_f32(
+; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: ret float [[X]]
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF0000020000000, float 0x7FF0000040000000)
+ ret float %med3
+}
+
+define float @fmed3_snan1_snan2_snan3_f32(float %x) #1 {
+; CHECK-LABEL: define float @fmed3_snan1_snan2_snan3_f32(
+; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: ret float 0x7FF8000020000000
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float 0x7FF0000020000000, float 0x7FF0000040000000)
+ ret float %med3
+}
+
+define float @fmed3_snan1_1_2_f32(float %x, float %y) #1 {
+; CHECK-LABEL: define float @fmed3_snan1_1_2_f32(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: ret float 1.000000e+00
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float 1.0, float 2.0)
+ ret float %med3
+}
+
+define float @fmed3_snan1_neg1_2_f32(float %x, float %y) #1 {
+; CHECK-LABEL: define float @fmed3_snan1_neg1_2_f32(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: ret float -1.000000e+00
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 0x7FF4000000000000, float 2.0)
+ ret float %med3
+}
+
+define float @fmed3_neg2_3_snan1_f32(float %x, float %y) #1 {
+; CHECK-LABEL: define float @fmed3_neg2_3_snan1_f32(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: ret float 3.000000e+00
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float -2.0, float 3.0, float 0x7FF4000000000000)
+ ret float %med3
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.fmed3 with default mode implied by shader CC
+; --------------------------------------------------------------------
+
+define amdgpu_ps float @amdgpu_ps_default_fmed3_snan1_x_y_f32(float %x, float %y) {
+; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_snan1_x_y_f32(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: ret float [[MED3]]
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float %x, float %y)
+ ret float %med3
+}
+
+define amdgpu_ps float @amdgpu_ps_default_fmed3_x_snan1_y_f32(float %x, float %y) {
+; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_snan1_y_f32(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: ret float [[MED3]]
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF4000000000000, float %y)
+ ret float %med3
+}
+
+define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(float %x, float %y) {
+; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: ret float [[MED3]]
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000)
+ ret float %med3
+}
+; --------------------------------------------------------------------
+; llvm.amdgcn.fmed3 with default mode shader cc and amdgpu-ieee
+; --------------------------------------------------------------------
+
+define amdgpu_ps float @amdgpu_ps_attr_fmed3_snan1_x_y_f32(float %x, float %y) #1 {
+; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_snan1_x_y_f32(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: ret float [[MED3]]
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float %x, float %y)
+ ret float %med3
+}
+
+define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_snan1_y_f32(float %x, float %y) #1 {
+; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_snan1_y_f32(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: ret float [[MED3]]
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF4000000000000, float %y)
+ ret float %med3
+}
+
+define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_y_snan1_f32(float %x, float %y) #1 {
+; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_y_snan1_f32(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: ret float [[MED3]]
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000)
+ ret float %med3
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.fmed3 with strictfp calls
+; --------------------------------------------------------------------
+
+define float @fmed3_qnan0_x_y_f32_strictfp(float %x, float %y) #2 {
+; CHECK-LABEL: define float @fmed3_qnan0_x_y_f32_strictfp(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: ret float [[MED3]]
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y) strictfp
+ ret float %med3
+}
+
+define float @fmed3_x_qnan0_y_f32_strictfp(float %x, float %y) #2 {
+; CHECK-LABEL: define float @fmed3_x_qnan0_y_f32_strictfp(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: ret float [[MED3]]
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y) strictfp
+ ret float %med3
+}
+
+define float @fmed3_x_y_qnan0_f32_strictfp(float %x, float %y) #2 {
+; CHECK-LABEL: define float @fmed3_x_y_qnan0_f32_strictfp(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: ret float [[MED3]]
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000) strictfp
+ ret float %med3
+}
+
+define float @fmed3_snan1_x_y_f32_strictfp(float %x, float %y) #2 {
+; CHECK-LABEL: define float @fmed3_snan1_x_y_f32_strictfp(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: ret float [[MED3]]
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF4000000000000, float %x, float %y) strictfp
+ ret float %med3
+}
+
+define float @fmed3_x_snan1_y_f32_strictfp(float %x, float %y) #2 {
+; CHECK-LABEL: define float @fmed3_x_snan1_y_f32_strictfp(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: ret float [[MED3]]
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF4000000000000, float %y) strictfp
+ ret float %med3
+}
+
+define float @fmed3_x_y_snan1_f32_strictfp(float %x, float %y) #2 {
+; CHECK-LABEL: define float @fmed3_x_y_snan1_f32_strictfp(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
+; CHECK-NEXT: ret float [[MED3]]
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000) strictfp
+ ret float %med3
+}
+
+define float @fmed3_poison_x_y_f32_strictfp(float %x, float %y) #2 {
+; CHECK-LABEL: define float @fmed3_poison_x_y_f32_strictfp(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: ret float poison
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float poison, float %x, float %y) strictfp
+ ret float %med3
+}
+
+define float @fmed3_x_poison_y_f32_strictfp(float %x, float %y) #2 {
+; CHECK-LABEL: define float @fmed3_x_poison_y_f32_strictfp(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: ret float poison
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float poison, float %y) strictfp
+ ret float %med3
+}
+
+define float @fmed3_x_y_poison_f32_strictfp(float %x, float %y) #2 {
+; CHECK-LABEL: define float @fmed3_x_y_poison_f32_strictfp(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: ret float poison
+;
+ %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float poison) strictfp
+ ret float %med3
+}
+
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-attributes #1 = { nounwind }
+attributes #1 = { nounwind "amdgpu-ieee"="true" }
+attributes #2 = { nounwind strictfp "amdgpu-ieee"="true" }
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; IEEE0: {{.*}}
+; IEEE1: {{.*}}
``````````
</details>
https://github.com/llvm/llvm-project/pull/139529
More information about the llvm-commits
mailing list