[llvm] [X86][FP16] Limit combination of fp_round & concat to concat of 2 operands (PR #94302)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 3 18:33:23 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Feng Zou (fzou1)
<details>
<summary>Changes</summary>
Add check of number of operands for concat_vectors being equal to 2. This can
avoid crash if there are more than 2 operands for concat_vectors and
some are undef value.
---
Full diff: https://github.com/llvm/llvm-project/pull/94302.diff
2 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+6-3)
- (added) llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll (+22)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0e377dd53b742..7d30de15f84d2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57181,9 +57181,12 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
SDValue Cvt, Chain;
unsigned NumElts = VT.getVectorNumElements();
if (Subtarget.hasFP16()) {
- // Combine (v8f16 fp_round(concat_vectors(v4f32 (xint_to_fp v4i64), ..)))
- // into (v8f16 vector_shuffle(v8f16 (CVTXI2P v4i64), ..))
- if (NumElts == 8 && Src.getOpcode() == ISD::CONCAT_VECTORS) {
+ // Combine (v8f16 fp_round(concat_vectors(v4f32 (xint_to_fp v4i64),
+ // v4f32 (xint_to_fp v4i64))))
+ // into (v8f16 vector_shuffle(v8f16 (CVTXI2P v4i64),
+ // v8f16 (CVTXI2P v4i64)))
+ if (NumElts == 8 && Src.getOpcode() == ISD::CONCAT_VECTORS &&
+ Src.getNumOperands() == 2) {
SDValue Cvt0, Cvt1;
SDValue Op0 = Src.getOperand(0);
SDValue Op1 = Src.getOperand(1);
diff --git a/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll b/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll
new file mode 100644
index 0000000000000..1c4b1cc55e4c3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512fp16 | FileCheck %s
+
+define void @foo(<2 x float> %0) {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT: vmovlps %xmm0, 0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %1 = shufflevector <2 x float> zeroinitializer, <2 x float> %0, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3>
+ %2 = fptrunc <8 x float> %1 to <8 x half>
+ %3 = bitcast <8 x half> %2 to <2 x i64>
+ %4 = extractelement <2 x i64> %3, i64 0
+ store i64 %4, ptr null, align 8
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/94302
More information about the llvm-commits
mailing list