[llvm] [X86][FP16] Do not generate X86 FMIN/FMAX for FP16 when VLX not enabled, part 2 (PR #143483)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 10 00:17:12 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Phoebe Wang (phoebewang)
<details>
<summary>Changes</summary>
Fixes: https://godbolt.org/z/eYTxeqE48
---
Full diff: https://github.com/llvm/llvm-project/pull/143483.diff
2 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+1)
- (renamed) llvm/test/CodeGen/X86/avx512fp16-novl.ll (+84)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 982583c4b3650..b34215b316128 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47804,6 +47804,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
Cond.getOpcode() == ISD::STRICT_FSETCCS) &&
VT.isFloatingPoint() && VT != MVT::f80 && VT != MVT::f128 &&
!isSoftF16(VT, Subtarget) && (TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
+ ((VT != MVT::v8f16 && VT != MVT::v16f16) || Subtarget.hasVLX()) &&
(Subtarget.hasSSE2() ||
(Subtarget.hasSSE1() && VT.getScalarType() == MVT::f32))) {
bool IsStrict = Cond->isStrictFPOpcode();
diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt-novl.ll b/llvm/test/CodeGen/X86/avx512fp16-novl.ll
similarity index 62%
rename from llvm/test/CodeGen/X86/avx512fp16-cvt-novl.ll
rename to llvm/test/CodeGen/X86/avx512fp16-novl.ll
index 26947b5eb3022..c64a59432abd2 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-cvt-novl.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-novl.ll
@@ -123,3 +123,87 @@ define <8 x half> @vector_uint16ToHalf(<8 x i16> %int16) {
%fp16 = uitofp <8 x i16> %int16 to <8 x half>
ret <8 x half> %fp16
}
+
+define <8 x half> @select(<8 x half> %x) {
+; CHECK-LABEL: select:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; CHECK-NEXT: vmovsh {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: vucomish %xmm1, %xmm0
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: vpsrld $16, %xmm0, %xmm2
+; CHECK-NEXT: vucomish %xmm1, %xmm2
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: kshiftlw $15, %k1, %k1
+; CHECK-NEXT: kshiftrw $14, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: movw $-5, %ax
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: kandw %k1, %k0, %k0
+; CHECK-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; CHECK-NEXT: vucomish %xmm1, %xmm2
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: kshiftlw $15, %k1, %k1
+; CHECK-NEXT: kshiftrw $13, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: movw $-9, %ax
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: kandw %k1, %k0, %k0
+; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm2
+; CHECK-NEXT: vucomish %xmm1, %xmm2
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: kshiftlw $15, %k1, %k1
+; CHECK-NEXT: kshiftrw $12, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: movw $-17, %ax
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: kandw %k1, %k0, %k0
+; CHECK-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
+; CHECK-NEXT: vucomish %xmm1, %xmm2
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: kshiftlw $15, %k1, %k1
+; CHECK-NEXT: kshiftrw $11, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: movw $-33, %ax
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: kandw %k1, %k0, %k0
+; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: vucomish %xmm1, %xmm2
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: kshiftlw $15, %k1, %k1
+; CHECK-NEXT: kshiftrw $10, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: movw $-65, %ax
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: kandw %k1, %k0, %k0
+; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
+; CHECK-NEXT: vucomish %xmm1, %xmm2
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: kshiftlw $6, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: kshiftlw $9, %k0, %k0
+; CHECK-NEXT: kshiftrw $9, %k0, %k0
+; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: vucomish %xmm1, %xmm2
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: kshiftlw $7, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k1
+; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; CHECK-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %c = fcmp ogt <8 x half> %x, splat (half 0xH3C00)
+ %s = select <8 x i1> %c, <8 x half> splat (half 0xH3C00), <8 x half> %x
+ ret <8 x half> %s
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/143483
More information about the llvm-commits
mailing list