[llvm] [X86][FP16] Limit combination of fp_round & concat to concat of 2 operands (PR #94302)

Mon Jun 3 18:32:50 PDT 2024

https://github.com/fzou1 created https://github.com/llvm/llvm-project/pull/94302

Add check of number of operands for concat_vectors being equal to 2. This can 
avoid crash if there are more than 2 operands for concat_vectors and
some are undef value.

>From fa505c98bfba302a7c511226b13de598fe0e376d Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Tue, 4 Jun 2024 08:21:13 +0800
Subject: [PATCH 1/2] [X86][FP16] Limit combination of fp_round & concat to
 concat of 2 operands

Add check of number of operands for concat_vectors being equal to 2. This can
avoid crash if there are more than 2 operands for concat_vectors and
some are undef value.
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  8 ++++---
 .../fp-round-with-concat-vector-undef-elem.ll | 22 +++++++++++++++++++
 2 files changed, 27 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0e377dd53b742..25c8502c0744e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57181,9 +57181,11 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
   SDValue Cvt, Chain;
   unsigned NumElts = VT.getVectorNumElements();
   if (Subtarget.hasFP16()) {
-    // Combine (v8f16 fp_round(concat_vectors(v4f32 (xint_to_fp v4i64), ..)))
-    // into (v8f16 vector_shuffle(v8f16 (CVTXI2P v4i64), ..))
-    if (NumElts == 8 && Src.getOpcode() == ISD::CONCAT_VECTORS) {
+    // Combine (v8f16 fp_round(concat_vectors(v4f32 (xint_to_fp v4i64),
+    //                                        v4f32 (xint_to_fp v4i64))))
+    // into (v8f16 vector_shuffle(v8f16 (CVTXI2P v4i64),
+    //                            v8f16 (CVTXI2P v4i64)))
+    if (NumElts == 8 && Src.getOpcode() == ISD::CONCAT_VECTORS && Src.getNumOperands() == 2) {
       SDValue Cvt0, Cvt1;
       SDValue Op0 = Src.getOperand(0);
       SDValue Op1 = Src.getOperand(1);
diff --git a/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll b/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll
new file mode 100644
index 0000000000000..1c4b1cc55e4c3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512fp16 | FileCheck %s
+
+define void @foo(<2 x float> %0) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; CHECK-NEXT:    vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT:    vmovlps %xmm0, 0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+entry:
+  %1 = shufflevector <2 x float> zeroinitializer, <2 x float> %0, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3>
+  %2 = fptrunc <8 x float> %1 to <8 x half>
+  %3 = bitcast <8 x half> %2 to <2 x i64>
+  %4 = extractelement <2 x i64> %3, i64 0
+  store i64 %4, ptr null, align 8
+  ret void
+}

>From e46866e9adda77ba8552ac03a5f84ce06dc2e1b8 Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Tue, 4 Jun 2024 09:30:18 +0800
Subject: [PATCH 2/2] Apply clang-format

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 25c8502c0744e..7d30de15f84d2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57185,7 +57185,8 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
     //                                        v4f32 (xint_to_fp v4i64))))
     // into (v8f16 vector_shuffle(v8f16 (CVTXI2P v4i64),
     //                            v8f16 (CVTXI2P v4i64)))
-    if (NumElts == 8 && Src.getOpcode() == ISD::CONCAT_VECTORS && Src.getNumOperands() == 2) {
+    if (NumElts == 8 && Src.getOpcode() == ISD::CONCAT_VECTORS &&
+        Src.getNumOperands() == 2) {
       SDValue Cvt0, Cvt1;
       SDValue Op0 = Src.getOperand(0);
       SDValue Op1 = Src.getOperand(1);