[llvm] 627463d - [X86][FP16] Limit combination of fp_round & concat to concat of 2 operands (#94302)

Tue Jun 4 19:08:31 PDT 2024

Author: Feng Zou
Date: 2024-06-05T10:08:28+08:00
New Revision: 627463dc39b569527243ce67902048c59521d5e0

URL: https://github.com/llvm/llvm-project/commit/627463dc39b569527243ce67902048c59521d5e0
DIFF: https://github.com/llvm/llvm-project/commit/627463dc39b569527243ce67902048c59521d5e0.diff

LOG: [X86][FP16] Limit combination of fp_round & concat to concat of 2 operands (#94302)

Add check of number of operands for concat_vectors being equal to 2.
This can
avoid crash if there are more than 2 operands for concat_vectors and
operand 0 & 1 are undef value.

Added: 
    llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0e377dd53b742..7d30de15f84d2 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57181,9 +57181,12 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
   SDValue Cvt, Chain;
   unsigned NumElts = VT.getVectorNumElements();
   if (Subtarget.hasFP16()) {
-    // Combine (v8f16 fp_round(concat_vectors(v4f32 (xint_to_fp v4i64), ..)))
-    // into (v8f16 vector_shuffle(v8f16 (CVTXI2P v4i64), ..))
-    if (NumElts == 8 && Src.getOpcode() == ISD::CONCAT_VECTORS) {
+    // Combine (v8f16 fp_round(concat_vectors(v4f32 (xint_to_fp v4i64),
+    //                                        v4f32 (xint_to_fp v4i64))))
+    // into (v8f16 vector_shuffle(v8f16 (CVTXI2P v4i64),
+    //                            v8f16 (CVTXI2P v4i64)))
+    if (NumElts == 8 && Src.getOpcode() == ISD::CONCAT_VECTORS &&
+        Src.getNumOperands() == 2) {
       SDValue Cvt0, Cvt1;
       SDValue Op0 = Src.getOperand(0);
       SDValue Op1 = Src.getOperand(1);

diff  --git a/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll b/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll
new file mode 100644
index 0000000000000..1c4b1cc55e4c3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512fp16 | FileCheck %s
+
+define void @foo(<2 x float> %0) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; CHECK-NEXT:    vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT:    vmovlps %xmm0, 0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+entry:
+  %1 = shufflevector <2 x float> zeroinitializer, <2 x float> %0, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3>
+  %2 = fptrunc <8 x float> %1 to <8 x half>
+  %3 = bitcast <8 x half> %2 to <2 x i64>
+  %4 = extractelement <2 x i64> %3, i64 0
+  store i64 %4, ptr null, align 8
+  ret void
+}