[llvm] 594bfad - [LoongArch] Pre-commit for broadcast load (#136070)

via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 17 20:17:21 PDT 2025


Author: tangaac
Date: 2025-04-18T11:17:17+08:00
New Revision: 594bfadbd683eab5e1cd40a450c25da2834f7768

URL: https://github.com/llvm/llvm-project/commit/594bfadbd683eab5e1cd40a450c25da2834f7768
DIFF: https://github.com/llvm/llvm-project/commit/594bfadbd683eab5e1cd40a450c25da2834f7768.diff

LOG: [LoongArch] Pre-commit for broadcast load (#136070)

Added: 
    llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
    llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
new file mode 100644
index 0000000000000..4fcf016376d09
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s
+
+; TODO: Load a element and splat it to a vector could be lowerd to xvldrepl
+
+; A load has more than one user shouldn't be lowered to xvldrepl
+define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: should_not_be_optimized:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.d $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.d $xr0, $a0
+; CHECK-NEXT:    st.d $a0, $a1, 0
+; CHECK-NEXT:    ret
+  %tmp = load i64, ptr %ptr
+  store i64 %tmp, ptr %dst
+  %tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0
+  %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> poison, <4 x i32> zeroinitializer
+  ret <4 x i64> %tmp2
+}
+
+define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
+; CHECK-LABEL: xvldrepl_d_unaligned_offset:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.d $a0, $a0, 4
+; CHECK-NEXT:    xvreplgr2vr.d $xr0, $a0
+; CHECK-NEXT:    ret
+  %p = getelementptr i32, ptr %ptr, i32 1
+  %tmp = load i64, ptr %p
+  %tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0
+  %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> poison, <4 x i32> zeroinitializer
+  ret <4 x i64> %tmp2
+}
+
+define <32 x i8> @xvldrepl_b(ptr %ptr) {
+; CHECK-LABEL: xvldrepl_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.b $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.b $xr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = insertelement <32 x i8> zeroinitializer, i8 %tmp, i32 0
+  %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> poison, <32 x i32> zeroinitializer
+  ret <32 x i8> %tmp2
+}
+
+define <32 x i8> @xvldrepl_b_offset(ptr %ptr) {
+; CHECK-LABEL: xvldrepl_b_offset:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.b $a0, $a0, 33
+; CHECK-NEXT:    xvreplgr2vr.b $xr0, $a0
+; CHECK-NEXT:    ret
+  %p = getelementptr i8, ptr %ptr, i64 33
+  %tmp = load i8, ptr %p
+  %tmp1 = insertelement <32 x i8> zeroinitializer, i8 %tmp, i32 0
+  %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> poison, <32 x i32> zeroinitializer
+  ret <32 x i8> %tmp2
+}
+
+
+define <16 x i16> @xvldrepl_h(ptr %ptr) {
+; CHECK-LABEL: xvldrepl_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.h $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i16, ptr %ptr
+  %tmp1 = insertelement <16 x i16> zeroinitializer, i16 %tmp, i32 0
+  %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> poison, <16 x i32> zeroinitializer
+  ret <16 x i16> %tmp2
+}
+
+define <16 x i16> @xvldrepl_h_offset(ptr %ptr) {
+; CHECK-LABEL: xvldrepl_h_offset:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.h $a0, $a0, 66
+; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a0
+; CHECK-NEXT:    ret
+  %p = getelementptr i16, ptr %ptr, i64 33
+  %tmp = load i16, ptr %p
+  %tmp1 = insertelement <16 x i16> zeroinitializer, i16 %tmp, i32 0
+  %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> poison, <16 x i32> zeroinitializer
+  ret <16 x i16> %tmp2
+}
+
+define <8 x i32> @xvldrepl_w(ptr %ptr) {
+; CHECK-LABEL: xvldrepl_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i32, ptr %ptr
+  %tmp1 = insertelement <8 x i32> zeroinitializer, i32 %tmp, i32 0
+  %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> poison, <8 x i32> zeroinitializer
+  ret <8 x i32> %tmp2
+}
+
+define <8 x i32> @xvldrepl_w_offset(ptr %ptr) {
+; CHECK-LABEL: xvldrepl_w_offset:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.w $a0, $a0, 132
+; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a0
+; CHECK-NEXT:    ret
+  %p = getelementptr i32, ptr %ptr, i64 33
+  %tmp = load i32, ptr %p
+  %tmp1 = insertelement <8 x i32> zeroinitializer, i32 %tmp, i32 0
+  %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> poison, <8 x i32> zeroinitializer
+  ret <8 x i32> %tmp2
+}
+
+
+define <4 x i64> @xvldrepl_d(ptr %ptr) {
+; CHECK-LABEL: xvldrepl_d:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.d $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.d $xr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i64, ptr %ptr
+  %tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0
+  %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> poison, <4 x i32> zeroinitializer
+  ret <4 x i64> %tmp2
+}
+
+define <4 x i64> @xvldrepl_d_offset(ptr %ptr) {
+; CHECK-LABEL: xvldrepl_d_offset:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.d $a0, $a0, 264
+; CHECK-NEXT:    xvreplgr2vr.d $xr0, $a0
+; CHECK-NEXT:    ret
+  %p = getelementptr i64, ptr %ptr, i64 33
+  %tmp = load i64, ptr %p
+  %tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0
+  %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> poison, <4 x i32> zeroinitializer
+  ret <4 x i64> %tmp2
+}
+
+define <8 x float> @vldrepl_w_flt(ptr %ptr) {
+; CHECK-LABEL: vldrepl_w_flt:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fld.s $fa0, $a0, 0
+; CHECK-NEXT:    xvreplve0.w $xr0, $xr0
+; CHECK-NEXT:    ret
+  %tmp = load float, ptr %ptr
+  %tmp1 = insertelement <8 x float> zeroinitializer, float %tmp, i32 0
+  %tmp2 = shufflevector <8 x float> %tmp1, <8 x float> poison, <8 x i32> zeroinitializer
+  ret <8 x float> %tmp2
+}
+
+define <8 x float> @vldrepl_w_flt_offset(ptr %ptr) {
+; CHECK-LABEL: vldrepl_w_flt_offset:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fld.s $fa0, $a0, 264
+; CHECK-NEXT:    xvreplve0.w $xr0, $xr0
+; CHECK-NEXT:    ret
+  %p = getelementptr i64, ptr %ptr, i64 33
+  %tmp = load float, ptr %p
+  %tmp1 = insertelement <8 x float> zeroinitializer, float %tmp, i32 0
+  %tmp2 = shufflevector <8 x float> %tmp1, <8 x float> poison, <8 x i32> zeroinitializer
+  ret <8 x float> %tmp2
+}
+
+define <4 x double> @vldrepl_d_dbl(ptr %ptr) {
+; CHECK-LABEL: vldrepl_d_dbl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fld.d $fa0, $a0, 0
+; CHECK-NEXT:    xvreplve0.d $xr0, $xr0
+; CHECK-NEXT:    ret
+  %tmp = load double, ptr %ptr
+  %tmp1 = insertelement <4 x double> zeroinitializer, double %tmp, i32 0
+  %tmp2 = shufflevector <4 x double> %tmp1, <4 x double> poison, <4 x i32> zeroinitializer
+  ret <4 x double> %tmp2
+}
+
+define <4 x double> @vldrepl_d_dbl_offset(ptr %ptr) {
+; CHECK-LABEL: vldrepl_d_dbl_offset:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fld.d $fa0, $a0, 264
+; CHECK-NEXT:    xvreplve0.d $xr0, $xr0
+; CHECK-NEXT:    ret
+  %p = getelementptr i64, ptr %ptr, i64 33
+  %tmp = load double, ptr %p
+  %tmp1 = insertelement <4 x double> zeroinitializer, double %tmp, i32 0
+  %tmp2 = shufflevector <4 x double> %tmp1, <4 x double> poison, <4 x i32> zeroinitializer
+  ret <4 x double> %tmp2
+}
+

diff  --git a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
new file mode 100644
index 0000000000000..02b68725687dd
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
@@ -0,0 +1,183 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s
+
+; TODO: Load a element and splat it to a vector could be lowerd to vldrepl
+
+; A load has more than one user shouldn't be lowered to vldrepl
+define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
+; CHECK-LABEL: should_not_be_optimized:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.d $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.d $vr0, $a0
+; CHECK-NEXT:    st.d $a0, $a1, 0
+; CHECK-NEXT:    ret
+  %tmp = load i64, ptr %ptr
+  store i64 %tmp, ptr %dst
+  %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
+  %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> poison, <2 x i32> zeroinitializer
+  ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
+; CHECK-LABEL: vldrepl_d_unaligned_offset:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.d $a0, $a0, 4
+; CHECK-NEXT:    vreplgr2vr.d $vr0, $a0
+; CHECK-NEXT:    ret
+  %p = getelementptr i32, ptr %ptr, i32 1
+  %tmp = load i64, ptr %p
+  %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
+  %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> poison, <2 x i32> zeroinitializer
+  ret <2 x i64> %tmp2
+}
+
+define <16 x i8> @vldrepl_b(ptr %ptr) {
+; CHECK-LABEL: vldrepl_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.b $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.b $vr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %tmp, i32 0
+  %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> poison, <16 x i32> zeroinitializer
+  ret <16 x i8> %tmp2
+}
+
+define <16 x i8> @vldrepl_b_offset(ptr %ptr) {
+; CHECK-LABEL: vldrepl_b_offset:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.b $a0, $a0, 33
+; CHECK-NEXT:    vreplgr2vr.b $vr0, $a0
+; CHECK-NEXT:    ret
+  %p = getelementptr i8, ptr %ptr, i64 33
+  %tmp = load i8, ptr %p
+  %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %tmp, i32 0
+  %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> poison, <16 x i32> zeroinitializer
+  ret <16 x i8> %tmp2
+}
+
+
+define <8 x i16> @vldrepl_h(ptr %ptr) {
+; CHECK-LABEL: vldrepl_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.h $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.h $vr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i16, ptr %ptr
+  %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %tmp, i32 0
+  %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> poison, <8 x i32> zeroinitializer
+  ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @vldrepl_h_offset(ptr %ptr) {
+; CHECK-LABEL: vldrepl_h_offset:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.h $a0, $a0, 66
+; CHECK-NEXT:    vreplgr2vr.h $vr0, $a0
+; CHECK-NEXT:    ret
+  %p = getelementptr i16, ptr %ptr, i64 33
+  %tmp = load i16, ptr %p
+  %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %tmp, i32 0
+  %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> poison, <8 x i32> zeroinitializer
+  ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vldrepl_w(ptr %ptr) {
+; CHECK-LABEL: vldrepl_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.w $vr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i32, ptr %ptr
+  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+  %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> zeroinitializer
+  ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @vldrepl_w_offset(ptr %ptr) {
+; CHECK-LABEL: vldrepl_w_offset:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.w $a0, $a0, 132
+; CHECK-NEXT:    vreplgr2vr.w $vr0, $a0
+; CHECK-NEXT:    ret
+  %p = getelementptr i32, ptr %ptr, i64 33
+  %tmp = load i32, ptr %p
+  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+  %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> zeroinitializer
+  ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vldrepl_d(ptr %ptr) {
+; CHECK-LABEL: vldrepl_d:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.d $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.d $vr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i64, ptr %ptr
+  %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
+  %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> poison, <2 x i32> zeroinitializer
+  ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @vldrepl_d_offset(ptr %ptr) {
+; CHECK-LABEL: vldrepl_d_offset:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.d $a0, $a0, 264
+; CHECK-NEXT:    vreplgr2vr.d $vr0, $a0
+; CHECK-NEXT:    ret
+  %p = getelementptr i64, ptr %ptr, i64 33
+  %tmp = load i64, ptr %p
+  %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
+  %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> poison, <2 x i32> zeroinitializer
+  ret <2 x i64> %tmp2
+}
+
+define <4 x float> @vldrepl_w_flt(ptr %ptr) {
+; CHECK-LABEL: vldrepl_w_flt:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fld.s $fa0, $a0, 0
+; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT:    ret
+  %tmp = load float, ptr %ptr
+  %tmp1 = insertelement <4 x float> zeroinitializer, float %tmp, i32 0
+  %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> poison, <4 x i32> zeroinitializer
+  ret <4 x float> %tmp2
+}
+
+define <4 x float> @vldrepl_w_flt_offset(ptr %ptr) {
+; CHECK-LABEL: vldrepl_w_flt_offset:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fld.s $fa0, $a0, 264
+; CHECK-NEXT:    vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT:    ret
+  %p = getelementptr i64, ptr %ptr, i64 33
+  %tmp = load float, ptr %p
+  %tmp1 = insertelement <4 x float> zeroinitializer, float %tmp, i32 0
+  %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> poison, <4 x i32> zeroinitializer
+  ret <4 x float> %tmp2
+}
+
+define <2 x double> @vldrepl_d_dbl(ptr %ptr) {
+; CHECK-LABEL: vldrepl_d_dbl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fld.d $fa0, $a0, 0
+; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT:    ret
+  %tmp = load double, ptr %ptr
+  %tmp1 = insertelement <2 x double> zeroinitializer, double %tmp, i32 0
+  %tmp2 = shufflevector <2 x double> %tmp1, <2 x double> poison, <2 x i32> zeroinitializer
+  ret <2 x double> %tmp2
+}
+
+define <2 x double> @vldrepl_d_dbl_offset(ptr %ptr) {
+; CHECK-LABEL: vldrepl_d_dbl_offset:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fld.d $fa0, $a0, 264
+; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT:    ret
+  %p = getelementptr i64, ptr %ptr, i64 33
+  %tmp = load double, ptr %p
+  %tmp1 = insertelement <2 x double> zeroinitializer, double %tmp, i32 0
+  %tmp2 = shufflevector <2 x double> %tmp1, <2 x double> poison, <2 x i32> zeroinitializer
+  ret <2 x double> %tmp2
+}


        


More information about the llvm-commits mailing list