[llvm] [SelectionDAGBuilder][X86] Don't form FMAXNUM for f16 vectors if FMAXNUM needs to be promoted. (PR #114943)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 5 09:51:18 PST 2024
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/114943
>From b7e50d28db88f7c94490a6541a921062e2c61b85 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 4 Nov 2024 21:31:58 -0800
Subject: [PATCH 1/3] [SelectionDAGBuilder][X86] Don't form FMAXNUM for f16
vectors if FMAXNUM needs to be promoted.
In #70357, I changed a isLegalOrCustom to isLegalOrCustomOrPromote
in visitSelect to enable integer min/max to be formed when the operation
was promoted. Unfortunately, this also affected floating point.
For floating point, fmaxnum may require a libcall so we also need
to check if the operation on the promoted type is legal or custom.
Other changes to RISC-V have seen made the original change untested
so this patch restores the original isLegalOrCustom.
---
.../SelectionDAG/SelectionDAGBuilder.cpp | 2 +-
llvm/test/CodeGen/X86/pr114520.ll | 19 +++++++++++++++++++
2 files changed, 20 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/X86/pr114520.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 199d02afb97e3a..f8122b77b46def 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3775,7 +3775,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
}
if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
- (TLI.isOperationLegalOrCustomOrPromote(Opc, VT) ||
+ (TLI.isOperationLegalOrCustom(Opc, VT) ||
(UseScalarMinMax &&
TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
// If the underlying comparison instruction is used by any other
diff --git a/llvm/test/CodeGen/X86/pr114520.ll b/llvm/test/CodeGen/X86/pr114520.ll
new file mode 100644
index 00000000000000..f9dd3fce8c1ec0
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr114520.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-none-unknown-elf -mattr=+avx512vl | FileCheck %s
+
+define half @r_2_10001(half %0) {
+; CHECK-LABEL: r_2_10001:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vpextrw $0, %xmm0, %eax
+; CHECK-NEXT: vmovd %eax, %xmm0
+; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0
+; CHECK-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: movl $64512, %ecx # imm = 0xFC00
+; CHECK-NEXT: cmoval %eax, %ecx
+; CHECK-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: retq
+entry:
+ %cmp2 = fcmp ogt half %0, 0xHFC00
+ %cond.v = select i1 %cmp2, half %0, half 0xHFC00
+ ret half %cond.v
+}
>From 63421dee80680ce1e02f044bda280631f1e6dde2 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Tue, 5 Nov 2024 09:37:47 -0800
Subject: [PATCH 2/3] fixup! name value
---
llvm/test/CodeGen/X86/pr114520.ll | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/test/CodeGen/X86/pr114520.ll b/llvm/test/CodeGen/X86/pr114520.ll
index f9dd3fce8c1ec0..6bf52963514957 100644
--- a/llvm/test/CodeGen/X86/pr114520.ll
+++ b/llvm/test/CodeGen/X86/pr114520.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=x86_64-none-unknown-elf -mattr=+avx512vl | FileCheck %s
-define half @r_2_10001(half %0) {
+define half @r_2_10001(half %x) {
; CHECK-LABEL: r_2_10001:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vpextrw $0, %xmm0, %eax
@@ -13,7 +13,7 @@ define half @r_2_10001(half %0) {
; CHECK-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0
; CHECK-NEXT: retq
entry:
- %cmp2 = fcmp ogt half %0, 0xHFC00
- %cond.v = select i1 %cmp2, half %0, half 0xHFC00
+ %cmp2 = fcmp ogt half %x, 0xHFC00
+ %cond.v = select i1 %cmp2, half %x, half 0xHFC00
ret half %cond.v
}
>From b82b2f64791bb5f26fcb8f2fc8f89ae70557b780 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Tue, 5 Nov 2024 09:50:52 -0800
Subject: [PATCH 3/3] fixup! Add vector test.
---
llvm/test/CodeGen/X86/pr114520.ll | 97 ++++++++++++++++++++++++++++++-
1 file changed, 95 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/X86/pr114520.ll b/llvm/test/CodeGen/X86/pr114520.ll
index 6bf52963514957..660b169e302d83 100644
--- a/llvm/test/CodeGen/X86/pr114520.ll
+++ b/llvm/test/CodeGen/X86/pr114520.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=x86_64-none-unknown-elf -mattr=+avx512vl | FileCheck %s
-define half @r_2_10001(half %x) {
-; CHECK-LABEL: r_2_10001:
+define half @test1(half %x) {
+; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vpextrw $0, %xmm0, %eax
; CHECK-NEXT: vmovd %eax, %xmm0
@@ -17,3 +17,96 @@ entry:
%cond.v = select i1 %cmp2, half %x, half 0xHFC00
ret half %cond.v
}
+
+define <8 x half> @test2(<8 x half> %x) {
+; CHECK-LABEL: test2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtph2ps %xmm0, %xmm2
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = [-Inf,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: vucomiss %xmm1, %xmm2
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[1,1,1,1,4,5,6,7]
+; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
+; CHECK-NEXT: vucomiss %xmm1, %xmm2
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kshiftlw $15, %k1, %k1
+; CHECK-NEXT: kshiftrw $14, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: movw $-5, %ax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kandw %k1, %k0, %k0
+; CHECK-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
+; CHECK-NEXT: vucomiss %xmm1, %xmm2
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kshiftlw $15, %k1, %k1
+; CHECK-NEXT: kshiftrw $13, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: movw $-9, %ax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kandw %k1, %k0, %k0
+; CHECK-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[3,3,3,3,4,5,6,7]
+; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
+; CHECK-NEXT: vucomiss %xmm1, %xmm2
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kshiftlw $15, %k1, %k1
+; CHECK-NEXT: kshiftrw $12, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: movw $-17, %ax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kandw %k1, %k0, %k0
+; CHECK-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
+; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
+; CHECK-NEXT: vucomiss %xmm1, %xmm2
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kshiftlw $15, %k1, %k1
+; CHECK-NEXT: kshiftrw $11, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: movw $-33, %ax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kandw %k1, %k0, %k0
+; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
+; CHECK-NEXT: vucomiss %xmm1, %xmm2
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kshiftlw $15, %k1, %k1
+; CHECK-NEXT: kshiftrw $10, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: movw $-65, %ax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kandw %k1, %k0, %k0
+; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
+; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
+; CHECK-NEXT: vucomiss %xmm1, %xmm2
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kshiftlw $6, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: kshiftlw $9, %k0, %k0
+; CHECK-NEXT: kshiftrw $9, %k0, %k0
+; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2
+; CHECK-NEXT: vucomiss %xmm1, %xmm2
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kshiftlw $7, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k1
+; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf]
+; CHECK-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; CHECK-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovdw %ymm2, %xmm2
+; CHECK-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm2 & (xmm0 ^ xmm1))
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %cmp2 = fcmp ogt <8 x half> %x, splat (half 0xHFC00)
+ %cond.v = select <8 x i1> %cmp2, <8 x half> %x, <8 x half> splat (half 0xHFC00)
+ ret <8 x half> %cond.v
+}
More information about the llvm-commits
mailing list