[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Codegen for concat_vectors with LASX (PR #107948)

Mon Sep 9 18:39:44 PDT 2024

https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/107948

Backport 1ca411ca451e0e86caf9207779616f32ed9fd908

Requested by: @wangleiat

>From 9cd12fa1c545bea1c61bb1d85b451b5f391aac4e Mon Sep 17 00:00:00 2001
From: wanglei <wanglei at loongson.cn>
Date: Tue, 10 Sep 2024 09:28:15 +0800
Subject: [PATCH] [LoongArch] Codegen for concat_vectors with LASX

Fixes: #107355

Reviewed By: SixWeining

Pull Request: https://github.com/llvm/llvm-project/pull/107523

(cherry picked from commit 1ca411ca451e0e86caf9207779616f32ed9fd908)
---
 .../LoongArch/LoongArchISelLowering.cpp       |  1 +
 .../LoongArch/LoongArchLASXInstrInfo.td       |  6 ++++
 .../CodeGen/LoongArch/lasx/issue107355.ll     | 35 +++++++++++++++++++
 3 files changed, 42 insertions(+)
 create mode 100644 llvm/test/CodeGen/LoongArch/lasx/issue107355.ll

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index d80509cf39849e..93edafaff553ba 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -295,6 +295,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+      setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
 
       setOperationAction(ISD::SETCC, VT, Legal);
       setOperationAction(ISD::VSELECT, VT, Legal);
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 6f1969bf8cae05..0a220a0319bc3b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1789,6 +1789,12 @@ def : Pat<(v4i32 (fp_to_uint (v4f64 LASX256:$vj))),
                                                      v4f64:$vj)),
                           sub_128)>;
 
+// XVPERMI_Q
+foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in
+def : Pat<(vt (concat_vectors LSX128:$vd, LSX128:$vj)),
+          (XVPERMI_Q (SUBREG_TO_REG (i64 0), LSX128:$vd, sub_128),
+                     (SUBREG_TO_REG (i64 0), LSX128:$vj, sub_128), 2)>;
+
 } // Predicates = [HasExtLASX]
 
 /// Intrinsic pattern
diff --git a/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll b/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll
new file mode 100644
index 00000000000000..818bd4311615d3
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+;; Without this patch(codegen for concat_vectors), the test will hang.
+ at g_156 = external global [12 x i32]
+ at g_490 = external global i32
+ at g_813 = external global i32
+
+define void @foo() {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pcalau12i $a0, %got_pc_hi20(g_156)
+; CHECK-NEXT:    ld.d $a0, $a0, %got_pc_lo12(g_156)
+; CHECK-NEXT:    pcalau12i $a1, %got_pc_hi20(g_490)
+; CHECK-NEXT:    ld.d $a1, $a1, %got_pc_lo12(g_490)
+; CHECK-NEXT:    ld.w $a2, $a0, 24
+; CHECK-NEXT:    pcalau12i $a3, %got_pc_hi20(g_813)
+; CHECK-NEXT:    ld.d $a3, $a3, %got_pc_lo12(g_813)
+; CHECK-NEXT:    st.w $zero, $a1, 0
+; CHECK-NEXT:    st.w $a2, $a3, 0
+; CHECK-NEXT:    vrepli.b $vr0, 0
+; CHECK-NEXT:    vst $vr0, $a0, 32
+; CHECK-NEXT:    xvpermi.q $xr0, $xr0, 2
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    st.w $zero, $a0, 20
+; CHECK-NEXT:    ret
+entry:
+  store i32 0, ptr getelementptr inbounds (i8, ptr @g_156, i64 20), align 4
+  store i32 0, ptr @g_490, align 4
+  %0 = load i32, ptr getelementptr inbounds (i8, ptr @g_156, i64 24), align 4
+  store i32 %0, ptr @g_813, align 4
+  tail call void @llvm.memset.p0.i64(ptr @g_156, i8 0, i64 48, i1 false)
+  store i32 0, ptr getelementptr inbounds (i8, ptr @g_156, i64 20), align 4
+  ret void
+}