[llvm] [X86][FP16] Limit combination of fp_round & concat to concat of 2 operands (PR #94302)

Mon Jun 3 18:33:23 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: Feng Zou (fzou1)

<details>
<summary>Changes</summary>

Add check of number of operands for concat_vectors being equal to 2. This can 
avoid crash if there are more than 2 operands for concat_vectors and
some are undef value.

---
Full diff: https://github.com/llvm/llvm-project/pull/94302.diff


2 Files Affected:

- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+6-3) 
- (added) llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll (+22) 


``````````diff

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0e377dd53b742..7d30de15f84d2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57181,9 +57181,12 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
   SDValue Cvt, Chain;
   unsigned NumElts = VT.getVectorNumElements();
   if (Subtarget.hasFP16()) {
-    // Combine (v8f16 fp_round(concat_vectors(v4f32 (xint_to_fp v4i64), ..)))
-    // into (v8f16 vector_shuffle(v8f16 (CVTXI2P v4i64), ..))
-    if (NumElts == 8 && Src.getOpcode() == ISD::CONCAT_VECTORS) {
+    // Combine (v8f16 fp_round(concat_vectors(v4f32 (xint_to_fp v4i64),
+    //                                        v4f32 (xint_to_fp v4i64))))
+    // into (v8f16 vector_shuffle(v8f16 (CVTXI2P v4i64),
+    //                            v8f16 (CVTXI2P v4i64)))
+    if (NumElts == 8 && Src.getOpcode() == ISD::CONCAT_VECTORS &&
+        Src.getNumOperands() == 2) {
       SDValue Cvt0, Cvt1;
       SDValue Op0 = Src.getOperand(0);
       SDValue Op1 = Src.getOperand(1);
diff --git a/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll b/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll
new file mode 100644
index 0000000000000..1c4b1cc55e4c3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512fp16 | FileCheck %s
+
+define void @foo(<2 x float> %0) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; CHECK-NEXT:    vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT:    vmovlps %xmm0, 0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+entry:
+  %1 = shufflevector <2 x float> zeroinitializer, <2 x float> %0, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3>
+  %2 = fptrunc <8 x float> %1 to <8 x half>
+  %3 = bitcast <8 x half> %2 to <2 x i64>
+  %4 = extractelement <2 x i64> %3, i64 0
+  store i64 %4, ptr null, align 8
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/94302